In [None]:
%%bash
pip install gym-super-mario-bros==7.4.0

Collecting gym-super-mario-bros==7.4.0
  Downloading gym_super_mario_bros-7.4.0-py3-none-any.whl (199 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 199.1/199.1 kB 4.6 MB/s eta 0:00:00
Collecting nes-py>=8.1.4 (from gym-super-mario-bros==7.4.0)
  Downloading nes_py-8.2.1.tar.gz (77 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 77.7/77.7 kB 6.9 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting pyglet<=1.5.21,>=1.4.0 (from nes-py>=8.1.4->gym-super-mario-bros==7.4.0)
  Downloading pyglet-1.5.21-py3-none-any.whl (1.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.1/1.1 MB 16.1 MB/s eta 0:00:00
Building wheels for collected packages: nes-py
  Building wheel for nes-py (setup.py): started
  Building wheel for nes-py (setup.py): finished with status 'done'
  Created wheel for nes-py: filename=nes_py-8.2.1-cp310-cp310-linux_x86_64.whl size=535718 sha256=3dd7c4efb1a9de9623156f707aa7835bcdba9ecaba9bf023de

In [None]:
%%bash
pip install tensordict
pip install torchrl

Collecting tensordict
  Downloading tensordict-0.2.1-cp310-cp310-manylinux1_x86_64.whl (986 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 986.5/986.5 kB 9.8 MB/s eta 0:00:00
Installing collected packages: tensordict
Successfully installed tensordict-0.2.1
Collecting torchrl
  Downloading torchrl-0.2.1-cp310-cp310-manylinux1_x86_64.whl (5.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.5/5.5 MB 25.0 MB/s eta 0:00:00
Installing collected packages: torchrl
Successfully installed torchrl-0.2.1


In [None]:
import torch
from torch import nn
from torchvision import transforms as T
from PIL import Image
import numpy as np
from pathlib import Path
from collections import deque
import random, datetime, os, copy
import gc
# Gym is an OpenAI toolkit for RL
import gym
from gym.spaces import Box
from gym.wrappers import FrameStack
from tqdm import tqdm
from collections import deque
import gc
from torchrl.data import TensorDictReplayBuffer, LazyMemmapStorage
import time
import matplotlib.pyplot as plt
from tensordict import TensorDict



# NES Emulator for OpenAI Gym
from nes_py.wrappers import JoypadSpace

# Super Mario environment for OpenAI Gym
import gym_super_mario_bros

In [None]:

class SkipFrame(gym.Wrapper):
    def __init__(self, env, skip):
        """Return only every `skip`-th frame"""
        super().__init__(env)
        self._skip = skip

    def step(self, action):
        """Repeat action, and sum reward"""
        total_reward = 0.0
        for i in range(self._skip):
            # Accumulate reward and repeat the same action
            obs, reward, done, trunk, info = self.env.step(action)
            total_reward += reward
            if done:
                break
        return obs, total_reward, done, trunk, info

In [None]:
class MarioNet(nn.Module):

    def __init__(self, input_dim, output_dim):
        super().__init__()
        c, h, w = input_dim

        if h != 84:
            raise ValueError(f"Expecting input height: 84, got: {h}")
        if w != 84:
            raise ValueError(f"Expecting input width: 84, got: {w}")

        self.online = nn.Sequential(
            nn.Conv2d(in_channels=c, out_channels=32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(3136, 512),
            nn.ReLU(),
            nn.Linear(512, output_dim),
        )

        self.target = copy.deepcopy(self.online)

        # Q_target parameters are frozen.
        for p in self.target.parameters():
            p.requires_grad = False

    def forward(self, input, model):
        if model == "online":
            return self.online(input)
        elif model == "target":
            return self.target(input)

In [None]:
class GrayScaleObservation(gym.ObservationWrapper):
    def __init__(self, env):
        super().__init__(env)
        obs_shape = self.observation_space.shape[:2]
        self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8)

    def permute_orientation(self, observation):
        # permute [H, W, C] array to [C, H, W] tensor
        observation = np.transpose(observation, (2, 0, 1))
        observation = torch.tensor(observation.copy(), dtype=torch.float)
        return observation

    def observation(self, observation):
        observation = self.permute_orientation(observation)
        transform = T.Grayscale()
        observation = transform(observation)
        return observation

In [None]:
class ResizeObservation(gym.ObservationWrapper):
    def __init__(self, env, shape):
        super().__init__(env)
        if isinstance(shape, int):
            self.shape = (shape, shape)
        else:
            self.shape = tuple(shape)

        obs_shape = self.shape + self.observation_space.shape[2:]
        self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8)

    def observation(self, observation):
        transforms = T.Compose(
            [T.Resize(self.shape, antialias= True), T.Normalize(0, 255)]
        )
        observation = transforms(observation).squeeze(0)
        return observation

In [None]:

class Mario:
    def __init__(self, state_dim, action_dim, save_dir):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.save_dir = save_dir

        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        # Mario's DNN to predict the most optimal action - we implement this in the Learn section
        self.net = MarioNet(self.state_dim, self.action_dim).float()
        self.net = self.net.to(device=self.device)

        self.exploration_rate = 1
        self.exploration_rate_decay = 0.99999975
        self.exploration_rate_min = 0.1
        self.curr_step = 0

        self.save_every = 5e5  # no. of experiences between saving Mario Net
        self.memory = TensorDictReplayBuffer(storage=LazyMemmapStorage(100000, device=torch.device("cpu")))
        self.batch_size = 32
        self.gamma = 0.9
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=0.00025)
        self.loss_fn = torch.nn.SmoothL1Loss()
        self.burnin = 1e4  # min. experiences before training
        self.learn_every = 3  # no. of experiences between updates to Q_online
        self.sync_every = 1e4  # no. of experiences between Q_target & Q_online sync

    def act(self, state):

        # EXPLORE
        if np.random.rand() < self.exploration_rate:
            action_idx = np.random.randint(self.action_dim)

        # EXPLOIT
        else:
            state = state[0].__array__() if isinstance(state, tuple) else state.__array__()
            state = torch.tensor(state, device=self.device).unsqueeze(0)
            action_values = self.net(state, model="online")
            action_idx = torch.argmax(action_values, axis=1).item()

        # decrease exploration_rate
        self.exploration_rate *= self.exploration_rate_decay
        self.exploration_rate = max(self.exploration_rate_min, self.exploration_rate)

        # increment step
        self.curr_step += 1
        return action_idx

    def cache(self, state, next_state, action, reward, done):
        def first_if_tuple(x):
            return x[0] if isinstance(x, tuple) else x
        state = first_if_tuple(state).__array__()
        next_state = first_if_tuple(next_state).__array__()

        state = torch.tensor(state)
        next_state = torch.tensor(next_state)
        action = torch.tensor([action])
        reward = torch.tensor([reward])
        done = torch.tensor([done])

        # self.memory.append((state, next_state, action, reward, done,))
        self.memory.add(TensorDict({"state": state, "next_state": next_state, "action": action, "reward": reward, "done": done}, batch_size=[]))

    def recall(self):
        batch = self.memory.sample(self.batch_size).to(self.device)
        state, next_state, action, reward, done = (batch.get(key) for key in ("state", "next_state", "action", "reward", "done"))
        return state, next_state, action.squeeze(), reward.squeeze(), done.squeeze()

    def td_estimate(self, state, action):
        state = state.to(self.device)
        action = action.to(self.device)
        current_Q = self.net(state, model="online")[
            np.arange(0, self.batch_size), action
        ]  # Q_online(s,a)
        return current_Q

    @torch.no_grad()
    def td_target(self, reward, next_state, done):
        next_state_Q = self.net(next_state, model="online")
        best_action = torch.argmax(next_state_Q, axis=1)
        next_Q = self.net(next_state, model="target")[
            np.arange(0, self.batch_size), best_action
        ]
        return (reward + (1 - done.float()) * self.gamma * next_Q).float()

    def update_Q_online(self, td_estimate, td_target):
        loss = self.loss_fn(td_estimate, td_target)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.item()

    def sync_Q_target(self):
        self.net.target.load_state_dict(self.net.online.state_dict())

    def save(self):
        save_path = (
            self.save_dir / f"mario_net_{int(self.curr_step // self.save_every)}.chkpt"
        )
        torch.save(
            dict(model=self.net.state_dict(), exploration_rate=self.exploration_rate),
            save_path,
        )
        print(f"MarioNet saved to {save_path} at step {self.curr_step}")


    def learn(self):
        if self.curr_step % self.sync_every == 0:
            self.sync_Q_target()

        if self.curr_step % self.save_every == 0:
            self.save()

        if self.curr_step < self.burnin:
            return None, None

        if self.curr_step % self.learn_every != 0:
            return None, None

        # Sample from memory
        state, next_state, action, reward, done = self.recall()

        # Get TD Estimate
        td_est = self.td_estimate(state, action)

        # Get TD Target
        td_tgt = self.td_target(reward, next_state, done)

        # Backpropagate loss through Q_online
        loss = self.update_Q_online(td_est, td_tgt)

        return (td_est.mean().item(), loss)

In [None]:
class MetricLogger:
    def __init__(self, save_dir):
        self.save_log = save_dir / "log"
        with open(self.save_log, "w") as f:
            f.write(
                f"{'Episode':>8}{'Step':>8}{'Epsilon':>10}{'MeanReward':>15}"
                f"{'MeanLength':>15}{'MeanLoss':>15}{'MeanQValue':>15}"
                f"{'TimeDelta':>15}{'Time':>20}\n"
            )
        self.ep_rewards_plot = save_dir / "reward_plot.jpg"
        self.ep_lengths_plot = save_dir / "length_plot.jpg"
        self.ep_avg_losses_plot = save_dir / "loss_plot.jpg"
        self.ep_avg_qs_plot = save_dir / "q_plot.jpg"

        # History metrics
        self.ep_rewards = []
        self.ep_lengths = []
        self.ep_avg_losses = []
        self.ep_avg_qs = []

        # Moving averages, added for every call to record()
        self.moving_avg_ep_rewards = []
        self.moving_avg_ep_lengths = []
        self.moving_avg_ep_avg_losses = []
        self.moving_avg_ep_avg_qs = []

        # Current episode metric
        self.init_episode()

        # Timing
        self.record_time = time.time()

    def log_step(self, reward, loss, q):
        self.curr_ep_reward += reward
        self.curr_ep_length += 1
        if loss:
            self.curr_ep_loss += loss
            self.curr_ep_q += q
            self.curr_ep_loss_length += 1

    def log_episode(self):
        "Mark end of episode"
        self.ep_rewards.append(self.curr_ep_reward)
        self.ep_lengths.append(self.curr_ep_length)
        if self.curr_ep_loss_length == 0:
            ep_avg_loss = 0
            ep_avg_q = 0
        else:
            ep_avg_loss = np.round(self.curr_ep_loss / self.curr_ep_loss_length, 5)
            ep_avg_q = np.round(self.curr_ep_q / self.curr_ep_loss_length, 5)
        self.ep_avg_losses.append(ep_avg_loss)
        self.ep_avg_qs.append(ep_avg_q)

        self.init_episode()

    def init_episode(self):
        self.curr_ep_reward = 0.0
        self.curr_ep_length = 0
        self.curr_ep_loss = 0.0
        self.curr_ep_q = 0.0
        self.curr_ep_loss_length = 0

    def record(self, episode, epsilon, step):
        mean_ep_reward = np.round(np.mean(self.ep_rewards[-100:]), 3)
        mean_ep_length = np.round(np.mean(self.ep_lengths[-100:]), 3)
        mean_ep_loss = np.round(np.mean(self.ep_avg_losses[-100:]), 3)
        mean_ep_q = np.round(np.mean(self.ep_avg_qs[-100:]), 3)
        self.moving_avg_ep_rewards.append(mean_ep_reward)
        self.moving_avg_ep_lengths.append(mean_ep_length)
        self.moving_avg_ep_avg_losses.append(mean_ep_loss)
        self.moving_avg_ep_avg_qs.append(mean_ep_q)

        last_record_time = self.record_time
        self.record_time = time.time()
        time_since_last_record = np.round(self.record_time - last_record_time, 3)

        print(
            f"Episode {episode} - "
            f"Step {step} - "
            f"Epsilon {epsilon} - "
            f"Mean Reward {mean_ep_reward} - "
            f"Mean Length {mean_ep_length} - "
            f"Mean Loss {mean_ep_loss} - "
            f"Mean Q Value {mean_ep_q} - "
            f"Time Delta {time_since_last_record} - "
            f"Time {datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}"
        )

        with open(self.save_log, "a") as f:
            f.write(
                f"{episode:8d}{step:8d}{epsilon:10.3f}"
                f"{mean_ep_reward:15.3f}{mean_ep_length:15.3f}{mean_ep_loss:15.3f}{mean_ep_q:15.3f}"
                f"{time_since_last_record:15.3f}"
                f"{datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S'):>20}\n"
            )

        for metric in ["ep_lengths", "ep_avg_losses", "ep_avg_qs", "ep_rewards"]:
            plt.clf()
            plt.plot(getattr(self, f"moving_avg_{metric}"), label=f"moving_avg_{metric}")
            plt.legend()
            plt.savefig(getattr(self, f"{metric}_plot"))

In [None]:
# Initialize Super Mario environment (in v0.26 change render mode to 'human' to see results on the screen)
if gym.__version__ < '0.26':
    env = gym_super_mario_bros.make("SuperMarioBros-1-1-v0", new_step_api=True)
else:
    env = gym_super_mario_bros.make("SuperMarioBros-1-1-v0", render_mode='rgb_array', apply_api_compatibility=True)

# Limit the action-space to
#   0. walk right
#   1. jump right
env = JoypadSpace(env, [["right"], ["right", "A"]])

env.reset()
next_state, reward, done, trunc, info = env.step(action=0)
print(f"{next_state.shape},\n {reward},\n {done},\n {info}")

# Apply Wrappers to environment
env = SkipFrame(env, skip=4)
env = GrayScaleObservation(env)
env = ResizeObservation(env, shape=84)
if gym.__version__ < '0.26':
    env = FrameStack(env, num_stack=4, new_step_api=True)
else:
    env = FrameStack(env, num_stack=4)


use_cuda = torch.cuda.is_available()
print(f"Using CUDA: {use_cuda}")
print()

save_dir = Path("checkpoints2")
save_dir.mkdir(parents=True)

mario = Mario(state_dim=(4, 84, 84), action_dim=env.action_space.n, save_dir=save_dir)

logger = MetricLogger(save_dir)
episodes = 40000
with tqdm(total= episodes) as pbar:
    for e in range(episodes):

        state = env.reset()

        # Play the game!
        while True:

            # Run agent on the state
            action = mario.act(state)

            # Agent performs action
            next_state, reward, done, trunc, info = env.step(action)

            # Remember
            mario.cache(state, next_state, action, reward, done)

            # Learn
            q, loss = mario.learn()

            # Logging
            logger.log_step(reward, loss, q)

            # Update state
            state = next_state

            # Check if end of game
            if done or info["flag_get"]:
                break
        logger.log_episode()

        if e % 20 == 0:
            logger.record(episode=e, epsilon=mario.exploration_rate, step=mario.curr_step)
        if e % 100 == 0 and e != 0:
            mario.save()

        pbar.update(1)
mario.save()
gc.collect()

  logger.warn(
  deprecation(
  logger.deprecation(


(240, 256, 3),
 0.0,
 False,
 {'coins': 0, 'flag_get': False, 'life': 2, 'score': 0, 'stage': 1, 'status': 'small', 'time': 400, 'world': 1, 'x_pos': 40, 'y_pos': 79}
Using CUDA: True



  0%|          | 0/40000 [00:00<?, ?it/s]

Episode 0 - Step 328 - Epsilon 0.9999180033516473 - Mean Reward 669.0 - Mean Length 328.0 - Mean Loss 0.0 - Mean Q Value 0.0 - Time Delta 3.777 - Time 2023-11-03T10:20:22


  0%|          | 20/40000 [00:52<21:32:07,  1.94s/it]

Episode 20 - Step 3995 - Epsilon 0.999001748459922 - Mean Reward 672.0 - Mean Length 190.238 - Mean Loss 0.0 - Mean Q Value 0.0 - Time Delta 49.809 - Time 2023-11-03T10:21:12


  0%|          | 40/40000 [01:50<53:10:56,  4.79s/it]

Episode 40 - Step 9850 - Epsilon 0.9975405291585261 - Mean Reward 740.854 - Mean Length 240.244 - Mean Loss 0.0 - Mean Q Value 0.0 - Time Delta 68.934 - Time 2023-11-03T10:22:21


  0%|          | 60/40000 [03:19<47:57:30,  4.32s/it]

Episode 60 - Step 15089 - Epsilon 0.99623485552674 - Mean Reward 736.984 - Mean Length 247.361 - Mean Loss 0.259 - Mean Q Value 0.879 - Time Delta 77.203 - Time 2023-11-03T10:23:38


  0%|          | 80/40000 [04:13<27:30:06,  2.48s/it]

Episode 80 - Step 18898 - Epsilon 0.9952866423065768 - Mean Reward 707.519 - Mean Length 233.309 - Mean Loss 0.308 - Mean Q Value 1.431 - Time Delta 55.493 - Time 2023-11-03T10:24:33


  0%|          | 100/40000 [05:09<34:18:54,  3.10s/it]

Episode 100 - Step 22750 - Epsilon 0.9943286425010921 - Mean Reward 703.29 - Mean Length 224.22 - Mean Loss 0.355 - Mean Q Value 2.138 - Time Delta 56.16 - Time 2023-11-03T10:25:30


  0%|          | 101/40000 [05:11<30:41:05,  2.77s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 22750


  0%|          | 120/40000 [06:13<43:25:55,  3.92s/it]

Episode 120 - Step 27225 - Epsilon 0.9932168592129016 - Mean Reward 687.93 - Mean Length 232.3 - Mean Loss 0.453 - Mean Q Value 3.326 - Time Delta 65.789 - Time 2023-11-03T10:26:35


  0%|          | 140/40000 [06:54<30:22:56,  2.74s/it]

Episode 140 - Step 30088 - Epsilon 0.9925062185078727 - Mean Reward 638.66 - Mean Length 202.38 - Mean Loss 0.544 - Mean Q Value 4.508 - Time Delta 41.481 - Time 2023-11-03T10:27:17


  0%|          | 160/40000 [07:57<37:08:08,  3.36s/it]

Episode 160 - Step 34273 - Epsilon 0.9914683517755868 - Mean Reward 619.49 - Mean Length 191.84 - Mean Loss 0.491 - Mean Q Value 5.703 - Time Delta 60.901 - Time 2023-11-03T10:28:18


  0%|          | 180/40000 [09:01<31:23:59,  2.84s/it]

Episode 180 - Step 38682 - Epsilon 0.9903761077217436 - Mean Reward 630.26 - Mean Length 197.84 - Mean Loss 0.497 - Mean Q Value 6.808 - Time Delta 64.309 - Time 2023-11-03T10:29:22


  0%|          | 200/40000 [10:09<27:24:40,  2.48s/it]

Episode 200 - Step 43773 - Epsilon 0.9891164081843401 - Mean Reward 630.18 - Mean Length 210.23 - Mean Loss 0.512 - Mean Q Value 7.917 - Time Delta 74.071 - Time 2023-11-03T10:30:36


  1%|          | 201/40000 [10:18<48:37:50,  4.40s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 43773


  1%|          | 220/40000 [11:55<39:10:02,  3.54s/it]

Episode 220 - Step 50909 - Epsilon 0.9873533973647591 - Mean Reward 670.81 - Mean Length 236.84 - Mean Loss 0.537 - Mean Q Value 8.989 - Time Delta 103.02 - Time 2023-11-03T10:32:19


  1%|          | 240/40000 [12:55<31:30:15,  2.85s/it]

Episode 240 - Step 54723 - Epsilon 0.9864124044722699 - Mean Reward 675.69 - Mean Length 246.35 - Mean Loss 0.576 - Mean Q Value 10.322 - Time Delta 55.649 - Time 2023-11-03T10:33:15


  1%|          | 260/40000 [13:58<16:32:41,  1.50s/it]

Episode 260 - Step 59101 - Epsilon 0.9853333665717517 - Mean Reward 695.12 - Mean Length 248.28 - Mean Loss 0.595 - Mean Q Value 11.135 - Time Delta 63.834 - Time 2023-11-03T10:34:19


  1%|          | 280/40000 [14:28<15:00:36,  1.36s/it]

Episode 280 - Step 61192 - Epsilon 0.9848184180963209 - Mean Reward 649.18 - Mean Length 225.1 - Mean Loss 0.648 - Mean Q Value 12.203 - Time Delta 30.674 - Time 2023-11-03T10:34:49


  1%|          | 300/40000 [15:30<47:31:08,  4.31s/it]

Episode 300 - Step 66072 - Epsilon 0.9836176720799872 - Mean Reward 626.29 - Mean Length 222.99 - Mean Loss 0.68 - Mean Q Value 13.024 - Time Delta 70.969 - Time 2023-11-03T10:36:00


  1%|          | 301/40000 [15:42<72:29:44,  6.57s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 66072


  1%|          | 320/40000 [16:27<19:35:08,  1.78s/it]

Episode 320 - Step 69579 - Epsilon 0.9827556631165189 - Mean Reward 582.89 - Mean Length 186.7 - Mean Loss 0.685 - Mean Q Value 13.642 - Time Delta 51.255 - Time 2023-11-03T10:36:52


  1%|          | 340/40000 [17:19<31:11:31,  2.83s/it]

Episode 340 - Step 72785 - Epsilon 0.9819682999319477 - Mean Reward 580.59 - Mean Length 180.62 - Mean Loss 0.72 - Mean Q Value 14.278 - Time Delta 46.719 - Time 2023-11-03T10:37:38


  1%|          | 360/40000 [18:32<46:07:24,  4.19s/it]

Episode 360 - Step 78010 - Epsilon 0.9806864410760837 - Mean Reward 601.06 - Mean Length 189.09 - Mean Loss 0.755 - Mean Q Value 15.045 - Time Delta 74.724 - Time 2023-11-03T10:38:53


  1%|          | 380/40000 [19:43<19:30:03,  1.77s/it]

Episode 380 - Step 83019 - Epsilon 0.9794591449278431 - Mean Reward 643.06 - Mean Length 218.27 - Mean Loss 0.773 - Mean Q Value 15.715 - Time Delta 71.381 - Time 2023-11-03T10:40:04


  1%|          | 400/40000 [20:46<13:06:25,  1.19s/it]

Episode 400 - Step 87308 - Epsilon 0.9784094825791372 - Mean Reward 659.23 - Mean Length 212.36 - Mean Loss 0.781 - Mean Q Value 16.343 - Time Delta 61.509 - Time 2023-11-03T10:41:06


  1%|          | 401/40000 [20:48<16:36:03,  1.51s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 87308


  1%|          | 420/40000 [21:25<14:30:49,  1.32s/it]

Episode 420 - Step 90110 - Epsilon 0.9777243466477292 - Mean Reward 663.17 - Mean Length 205.31 - Mean Loss 0.807 - Mean Q Value 16.998 - Time Delta 40.703 - Time 2023-11-03T10:41:47


  1%|          | 440/40000 [22:18<35:21:30,  3.22s/it]

Episode 440 - Step 93694 - Epsilon 0.9768486978727895 - Mean Reward 656.43 - Mean Length 209.09 - Mean Loss 0.806 - Mean Q Value 17.682 - Time Delta 51.947 - Time 2023-11-03T10:42:39


  1%|          | 460/40000 [23:29<47:31:21,  4.33s/it]

Episode 460 - Step 98769 - Epsilon 0.9756101068297076 - Mean Reward 637.31 - Mean Length 207.59 - Mean Loss 0.822 - Mean Q Value 18.188 - Time Delta 74.035 - Time 2023-11-03T10:43:53


  1%|          | 480/40000 [24:41<25:40:57,  2.34s/it]

Episode 480 - Step 103482 - Epsilon 0.9744612710183774 - Mean Reward 629.77 - Mean Length 204.63 - Mean Loss 0.824 - Mean Q Value 18.643 - Time Delta 68.437 - Time 2023-11-03T10:45:01


  1%|▏         | 500/40000 [25:46<70:45:48,  6.45s/it]

Episode 500 - Step 108207 - Epsilon 0.9733108680885264 - Mean Reward 627.19 - Mean Length 208.99 - Mean Loss 0.834 - Mean Q Value 19.078 - Time Delta 67.706 - Time 2023-11-03T10:46:09


  1%|▏         | 501/40000 [25:50<64:01:13,  5.83s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 108207


  1%|▏         | 520/40000 [26:42<41:00:05,  3.74s/it]

Episode 520 - Step 112045 - Epsilon 0.9723774240851591 - Mean Reward 640.01 - Mean Length 219.35 - Mean Loss 0.848 - Mean Q Value 19.554 - Time Delta 54.955 - Time 2023-11-03T10:47:04


  1%|▏         | 540/40000 [27:22<18:33:06,  1.69s/it]

Episode 540 - Step 115030 - Epsilon 0.9716520580275388 - Mean Reward 647.92 - Mean Length 213.36 - Mean Loss 0.864 - Mean Q Value 20.002 - Time Delta 43.052 - Time 2023-11-03T10:47:47


  1%|▏         | 560/40000 [28:40<27:42:24,  2.53s/it]

Episode 560 - Step 120146 - Epsilon 0.9704101092854274 - Mean Reward 629.14 - Mean Length 213.77 - Mean Loss 0.87 - Mean Q Value 20.43 - Time Delta 72.701 - Time 2023-11-03T10:48:59


  1%|▏         | 580/40000 [29:48<39:56:17,  3.65s/it]

Episode 580 - Step 124846 - Epsilon 0.969270546888426 - Mean Reward 650.24 - Mean Length 213.64 - Mean Loss 0.892 - Mean Q Value 20.795 - Time Delta 67.531 - Time 2023-11-03T10:50:07


  2%|▏         | 600/40000 [30:49<35:17:19,  3.22s/it]

Episode 600 - Step 129097 - Epsilon 0.9682410016573804 - Mean Reward 655.32 - Mean Length 208.9 - Mean Loss 0.907 - Mean Q Value 21.164 - Time Delta 61.183 - Time 2023-11-03T10:51:08


  2%|▏         | 601/40000 [30:50<28:00:54,  2.56s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 129097


  2%|▏         | 620/40000 [31:58<21:18:33,  1.95s/it]

Episode 620 - Step 133917 - Epsilon 0.9670749737773556 - Mean Reward 664.78 - Mean Length 218.72 - Mean Loss 0.939 - Mean Q Value 21.66 - Time Delta 69.107 - Time 2023-11-03T10:52:17


  2%|▏         | 640/40000 [32:53<23:13:50,  2.12s/it]

Episode 640 - Step 137772 - Epsilon 0.9661434041270286 - Mean Reward 677.71 - Mean Length 227.42 - Mean Loss 0.949 - Mean Q Value 21.993 - Time Delta 55.474 - Time 2023-11-03T10:53:13


  2%|▏         | 660/40000 [33:49<35:52:43,  3.28s/it]

Episode 660 - Step 141805 - Epsilon 0.9651697808283027 - Mean Reward 683.98 - Mean Length 216.59 - Mean Loss 0.981 - Mean Q Value 22.366 - Time Delta 57.976 - Time 2023-11-03T10:54:11


  2%|▏         | 680/40000 [35:11<77:01:22,  7.05s/it]

Episode 680 - Step 147637 - Epsilon 0.9637635884752127 - Mean Reward 696.4 - Mean Length 227.91 - Mean Loss 0.979 - Mean Q Value 22.777 - Time Delta 82.599 - Time 2023-11-03T10:55:33


  2%|▏         | 700/40000 [36:19<29:12:57,  2.68s/it]

Episode 700 - Step 152458 - Epsilon 0.9626027119786563 - Mean Reward 722.71 - Mean Length 233.61 - Mean Loss 1.002 - Mean Q Value 23.265 - Time Delta 68.703 - Time 2023-11-03T10:56:42


  2%|▏         | 701/40000 [36:24<37:27:26,  3.43s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 152458


  2%|▏         | 720/40000 [37:27<36:26:25,  3.34s/it]

Episode 720 - Step 156883 - Epsilon 0.9615384213896145 - Mean Reward 720.43 - Mean Length 229.66 - Mean Loss 1.004 - Mean Q Value 23.666 - Time Delta 64.264 - Time 2023-11-03T10:57:46


  2%|▏         | 740/40000 [38:33<14:38:31,  1.34s/it]

Episode 740 - Step 161490 - Epsilon 0.9604316068834309 - Mean Reward 700.68 - Mean Length 237.18 - Mean Loss 1.015 - Mean Q Value 24.007 - Time Delta 66.727 - Time 2023-11-03T10:58:53


  2%|▏         | 760/40000 [39:21<22:08:44,  2.03s/it]

Episode 760 - Step 164921 - Epsilon 0.9596081497802302 - Mean Reward 685.42 - Mean Length 231.16 - Mean Loss 1.017 - Mean Q Value 24.301 - Time Delta 49.82 - Time 2023-11-03T10:59:43


  2%|▏         | 780/40000 [40:23<27:40:05,  2.54s/it]

Episode 780 - Step 169022 - Epsilon 0.9586248155694451 - Mean Reward 663.5 - Mean Length 213.85 - Mean Loss 1.047 - Mean Q Value 24.689 - Time Delta 59.506 - Time 2023-11-03T11:00:42


  2%|▏         | 800/40000 [41:33<48:52:30,  4.49s/it]

Episode 800 - Step 173889 - Epsilon 0.9574591180042976 - Mean Reward 660.1 - Mean Length 214.31 - Mean Loss 1.061 - Mean Q Value 24.926 - Time Delta 70.598 - Time 2023-11-03T11:01:53


  2%|▏         | 801/40000 [41:35<40:47:53,  3.75s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 173889


  2%|▏         | 820/40000 [42:37<47:01:15,  4.32s/it]

Episode 820 - Step 179221 - Epsilon 0.956183675111938 - Mean Reward 656.45 - Mean Length 223.38 - Mean Loss 1.07 - Mean Q Value 24.966 - Time Delta 76.168 - Time 2023-11-03T11:03:09


  2%|▏         | 840/40000 [43:53<49:17:27,  4.53s/it]

Episode 840 - Step 183772 - Epsilon 0.9550963956429136 - Mean Reward 693.82 - Mean Length 222.82 - Mean Loss 1.102 - Mean Q Value 25.165 - Time Delta 65.321 - Time 2023-11-03T11:04:14


  2%|▏         | 860/40000 [44:58<28:31:25,  2.62s/it]

Episode 860 - Step 188136 - Epsilon 0.9540549535549013 - Mean Reward 693.96 - Mean Length 232.15 - Mean Loss 1.11 - Mean Q Value 25.397 - Time Delta 63.212 - Time 2023-11-03T11:05:18


  2%|▏         | 880/40000 [46:03<22:46:49,  2.10s/it]

Episode 880 - Step 193174 - Epsilon 0.9528540776010042 - Mean Reward 697.03 - Mean Length 241.52 - Mean Loss 1.121 - Mean Q Value 25.419 - Time Delta 72.772 - Time 2023-11-03T11:06:30


  2%|▏         | 900/40000 [46:53<15:53:28,  1.46s/it]

Episode 900 - Step 196642 - Epsilon 0.9520283110339238 - Mean Reward 659.27 - Mean Length 227.53 - Mean Loss 1.127 - Mean Q Value 25.493 - Time Delta 50.601 - Time 2023-11-03T11:07:21


  2%|▏         | 901/40000 [47:03<41:30:35,  3.82s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 196642


  2%|▏         | 920/40000 [48:15<20:33:32,  1.89s/it]

Episode 920 - Step 202263 - Epsilon 0.950691412640778 - Mean Reward 692.0 - Mean Length 230.42 - Mean Loss 1.163 - Mean Q Value 25.78 - Time Delta 80.0 - Time 2023-11-03T11:08:41


  2%|▏         | 940/40000 [49:28<57:24:39,  5.29s/it]

Episode 940 - Step 206888 - Epsilon 0.9495928108090094 - Mean Reward 671.31 - Mean Length 231.16 - Mean Loss 1.168 - Mean Q Value 26.038 - Time Delta 67.34 - Time 2023-11-03T11:09:48


  2%|▏         | 960/40000 [50:28<24:59:46,  2.30s/it]

Episode 960 - Step 211589 - Epsilon 0.9484774572565376 - Mean Reward 701.36 - Mean Length 234.53 - Mean Loss 1.197 - Mean Q Value 26.263 - Time Delta 67.84 - Time 2023-11-03T11:10:56


  2%|▏         | 980/40000 [51:19<15:55:13,  1.47s/it]

Episode 980 - Step 214433 - Epsilon 0.9478033293809275 - Mean Reward 665.08 - Mean Length 212.59 - Mean Loss 1.211 - Mean Q Value 26.54 - Time Delta 41.833 - Time 2023-11-03T11:11:38


  2%|▎         | 1000/40000 [52:26<22:02:57,  2.04s/it]

Episode 1000 - Step 219638 - Epsilon 0.9465708022312237 - Mean Reward 684.82 - Mean Length 229.96 - Mean Loss 1.213 - Mean Q Value 26.821 - Time Delta 75.053 - Time 2023-11-03T11:12:53


  3%|▎         | 1001/40000 [52:35<45:31:01,  4.20s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 219638


  3%|▎         | 1020/40000 [53:42<31:14:23,  2.89s/it]

Episode 1020 - Step 224571 - Epsilon 0.9454041631700536 - Mean Reward 654.0 - Mean Length 223.08 - Mean Loss 1.213 - Mean Q Value 27.106 - Time Delta 70.896 - Time 2023-11-03T11:14:04


  3%|▎         | 1040/40000 [54:45<42:35:03,  3.93s/it]

Episode 1040 - Step 228878 - Epsilon 0.9443867469597864 - Mean Reward 636.39 - Mean Length 219.9 - Mean Loss 1.208 - Mean Q Value 27.122 - Time Delta 60.634 - Time 2023-11-03T11:15:05


  3%|▎         | 1060/40000 [55:50<29:57:47,  2.77s/it]

Episode 1060 - Step 233575 - Epsilon 0.9432784515187889 - Mean Reward 639.0 - Mean Length 219.86 - Mean Loss 1.214 - Mean Q Value 27.263 - Time Delta 66.846 - Time 2023-11-03T11:16:11


  3%|▎         | 1080/40000 [56:27<22:41:42,  2.10s/it]

Episode 1080 - Step 236121 - Epsilon 0.9426782457450402 - Mean Reward 651.04 - Mean Length 216.88 - Mean Loss 1.226 - Mean Q Value 27.5 - Time Delta 36.514 - Time 2023-11-03T11:16:48


  3%|▎         | 1100/40000 [57:39<34:06:16,  3.16s/it]

Episode 1100 - Step 241038 - Epsilon 0.9415201702944886 - Mean Reward 631.93 - Mean Length 214.0 - Mean Loss 1.242 - Mean Q Value 27.738 - Time Delta 70.341 - Time 2023-11-03T11:17:58


  3%|▎         | 1101/40000 [57:40<27:43:27,  2.57s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 241038


  3%|▎         | 1120/40000 [58:28<35:49:56,  3.32s/it]

Episode 1120 - Step 244907 - Epsilon 0.940609925084173 - Mean Reward 616.3 - Mean Length 203.36 - Mean Loss 1.237 - Mean Q Value 27.872 - Time Delta 55.602 - Time 2023-11-03T11:18:54


  3%|▎         | 1140/40000 [59:22<24:34:11,  2.28s/it]

Episode 1140 - Step 248280 - Epsilon 0.939817089991707 - Mean Reward 635.56 - Mean Length 194.02 - Mean Loss 1.256 - Mean Q Value 28.118 - Time Delta 48.188 - Time 2023-11-03T11:19:42


  3%|▎         | 1160/40000 [1:00:11<24:18:59,  2.25s/it]

Episode 1160 - Step 251867 - Epsilon 0.9389746866798762 - Mean Reward 620.79 - Mean Length 182.92 - Mean Loss 1.233 - Mean Q Value 28.429 - Time Delta 51.531 - Time 2023-11-03T11:20:34


  3%|▎         | 1180/40000 [1:01:17<32:05:46,  2.98s/it]

Episode 1180 - Step 256401 - Epsilon 0.9379109617194246 - Mean Reward 645.62 - Mean Length 202.8 - Mean Loss 1.212 - Mean Q Value 28.58 - Time Delta 65.867 - Time 2023-11-03T11:21:39


  3%|▎         | 1200/40000 [1:02:28<51:17:48,  4.76s/it]

Episode 1200 - Step 261426 - Epsilon 0.9367334507053771 - Mean Reward 661.52 - Mean Length 203.88 - Mean Loss 1.202 - Mean Q Value 28.715 - Time Delta 71.654 - Time 2023-11-03T11:22:51


  3%|▎         | 1201/40000 [1:02:33<53:01:02,  4.92s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 261426


  3%|▎         | 1220/40000 [1:03:32<41:23:23,  3.84s/it]

Episode 1220 - Step 266114 - Epsilon 0.9356362420537045 - Mean Reward 674.97 - Mean Length 212.07 - Mean Loss 1.171 - Mean Q Value 28.952 - Time Delta 66.602 - Time 2023-11-03T11:23:58


  3%|▎         | 1240/40000 [1:04:31<30:05:13,  2.79s/it]

Episode 1240 - Step 269819 - Epsilon 0.9347700101117026 - Mean Reward 665.89 - Mean Length 215.39 - Mean Loss 1.141 - Mean Q Value 29.199 - Time Delta 53.419 - Time 2023-11-03T11:24:51


  3%|▎         | 1260/40000 [1:05:55<59:36:39,  5.54s/it]

Episode 1260 - Step 275840 - Epsilon 0.933364005837477 - Mean Reward 676.19 - Mean Length 239.73 - Mean Loss 1.123 - Mean Q Value 29.142 - Time Delta 86.817 - Time 2023-11-03T11:26:18


  3%|▎         | 1280/40000 [1:06:45<42:28:49,  3.95s/it]

Episode 1280 - Step 279053 - Epsilon 0.9326145821335962 - Mean Reward 652.43 - Mean Length 226.52 - Mean Loss 1.104 - Mean Q Value 29.202 - Time Delta 46.809 - Time 2023-11-03T11:27:05


  3%|▎         | 1300/40000 [1:07:35<36:18:08,  3.38s/it]

Episode 1300 - Step 282787 - Epsilon 0.9317443925364641 - Mean Reward 655.61 - Mean Length 213.61 - Mean Loss 1.083 - Mean Q Value 29.285 - Time Delta 53.621 - Time 2023-11-03T11:27:58


  3%|▎         | 1301/40000 [1:07:40<42:22:37,  3.94s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 282787


  3%|▎         | 1320/40000 [1:08:21<25:26:53,  2.37s/it]

Episode 1320 - Step 285843 - Epsilon 0.9310328115900531 - Mean Reward 641.75 - Mean Length 197.29 - Mean Loss 1.087 - Mean Q Value 29.38 - Time Delta 44.304 - Time 2023-11-03T11:28:43


  3%|▎         | 1340/40000 [1:09:42<37:12:30,  3.46s/it]

Episode 1340 - Step 291332 - Epsilon 0.9297560778538626 - Mean Reward 662.93 - Mean Length 215.13 - Mean Loss 1.084 - Mean Q Value 29.414 - Time Delta 78.951 - Time 2023-11-03T11:30:02


  3%|▎         | 1360/40000 [1:10:28<22:11:42,  2.07s/it]

Episode 1360 - Step 294664 - Epsilon 0.9289819134283753 - Mean Reward 639.1 - Mean Length 188.24 - Mean Loss 1.082 - Mean Q Value 29.612 - Time Delta 48.111 - Time 2023-11-03T11:30:50


  3%|▎         | 1380/40000 [1:11:44<43:21:58,  4.04s/it]

Episode 1380 - Step 300431 - Epsilon 0.9276435186343877 - Mean Reward 660.29 - Mean Length 213.78 - Mean Loss 1.081 - Mean Q Value 29.636 - Time Delta 82.152 - Time 2023-11-03T11:32:12


  4%|▎         | 1400/40000 [1:12:50<20:04:56,  1.87s/it]

Episode 1400 - Step 304400 - Epsilon 0.9267235207473289 - Mean Reward 643.74 - Mean Length 216.13 - Mean Loss 1.086 - Mean Q Value 29.7 - Time Delta 57.451 - Time 2023-11-03T11:33:09


  4%|▎         | 1401/40000 [1:12:51<17:23:46,  1.62s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 304400


  4%|▎         | 1420/40000 [1:13:40<21:05:33,  1.97s/it]

Episode 1420 - Step 308072 - Epsilon 0.9258731788155353 - Mean Reward 652.27 - Mean Length 222.29 - Mean Loss 1.083 - Mean Q Value 29.653 - Time Delta 53.099 - Time 2023-11-03T11:34:02


  4%|▎         | 1440/40000 [1:14:44<42:26:39,  3.96s/it]

Episode 1440 - Step 312275 - Epsilon 0.9249008283892775 - Mean Reward 639.83 - Mean Length 209.43 - Mean Loss 1.074 - Mean Q Value 29.736 - Time Delta 61.57 - Time 2023-11-03T11:35:04


  4%|▎         | 1460/40000 [1:15:36<18:24:50,  1.72s/it]

Episode 1460 - Step 316181 - Epsilon 0.9239981034445738 - Mean Reward 651.28 - Mean Length 215.17 - Mean Loss 1.069 - Mean Q Value 29.901 - Time Delta 56.514 - Time 2023-11-03T11:36:01


  4%|▎         | 1480/40000 [1:16:47<21:06:40,  1.97s/it]

Episode 1480 - Step 320869 - Epsilon 0.9229158118786441 - Mean Reward 666.13 - Mean Length 204.38 - Mean Loss 1.059 - Mean Q Value 30.121 - Time Delta 67.911 - Time 2023-11-03T11:37:08


  4%|▍         | 1500/40000 [1:17:38<15:02:01,  1.41s/it]

Episode 1500 - Step 324255 - Epsilon 0.9221348941162387 - Mean Reward 653.55 - Mean Length 198.55 - Mean Loss 1.035 - Mean Q Value 30.334 - Time Delta 48.961 - Time 2023-11-03T11:37:57


  4%|▍         | 1501/40000 [1:17:39<14:03:08,  1.31s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 324255


  4%|▍         | 1520/40000 [1:18:40<35:17:26,  3.30s/it]

Episode 1520 - Step 328714 - Epsilon 0.9211075168552517 - Mean Reward 668.92 - Mean Length 206.42 - Mean Loss 1.034 - Mean Q Value 30.51 - Time Delta 64.824 - Time 2023-11-03T11:39:02


  4%|▍         | 1540/40000 [1:19:25<19:06:32,  1.79s/it]

Episode 1540 - Step 331727 - Epsilon 0.9204139537773746 - Mean Reward 655.39 - Mean Length 194.52 - Mean Loss 1.042 - Mean Q Value 30.681 - Time Delta 44.214 - Time 2023-11-03T11:39:46


  4%|▍         | 1560/40000 [1:20:20<29:11:05,  2.73s/it]

Episode 1560 - Step 335726 - Epsilon 0.9194942296357017 - Mean Reward 636.12 - Mean Length 195.45 - Mean Loss 1.048 - Mean Q Value 30.75 - Time Delta 58.372 - Time 2023-11-03T11:40:45


  4%|▍         | 1580/40000 [1:21:09<40:56:49,  3.84s/it]

Episode 1580 - Step 338921 - Epsilon 0.9187600767692427 - Mean Reward 603.45 - Mean Length 180.52 - Mean Loss 1.046 - Mean Q Value 30.848 - Time Delta 45.703 - Time 2023-11-03T11:41:31


  4%|▍         | 1600/40000 [1:22:04<28:34:56,  2.68s/it]

Episode 1600 - Step 342610 - Epsilon 0.917913140785838 - Mean Reward 618.68 - Mean Length 183.55 - Mean Loss 1.067 - Mean Q Value 30.908 - Time Delta 52.706 - Time 2023-11-03T11:42:23


  4%|▍         | 1601/40000 [1:22:05<23:16:01,  2.18s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 342610


  4%|▍         | 1620/40000 [1:23:15<47:52:29,  4.49s/it]

Episode 1620 - Step 347634 - Epsilon 0.9167609654544016 - Mean Reward 633.66 - Mean Length 189.2 - Mean Loss 1.083 - Mean Q Value 31.103 - Time Delta 71.947 - Time 2023-11-03T11:43:35


  4%|▍         | 1640/40000 [1:24:16<23:58:05,  2.25s/it]

Episode 1640 - Step 351915 - Epsilon 0.9157803267662875 - Mean Reward 649.03 - Mean Length 201.88 - Mean Loss 1.093 - Mean Q Value 31.119 - Time Delta 61.489 - Time 2023-11-03T11:44:37


  4%|▍         | 1660/40000 [1:25:02<27:08:23,  2.55s/it]

Episode 1660 - Step 355171 - Epsilon 0.9150351848009776 - Mean Reward 641.58 - Mean Length 194.45 - Mean Loss 1.105 - Mean Q Value 31.151 - Time Delta 47.13 - Time 2023-11-03T11:45:24


  4%|▍         | 1680/40000 [1:25:58<18:18:37,  1.72s/it]

Episode 1680 - Step 359088 - Epsilon 0.9141395750686694 - Mean Reward 659.91 - Mean Length 201.67 - Mean Loss 1.114 - Mean Q Value 31.282 - Time Delta 56.279 - Time 2023-11-03T11:46:20


  4%|▍         | 1700/40000 [1:27:02<67:43:48,  6.37s/it]

Episode 1700 - Step 363924 - Epsilon 0.9130350480053429 - Mean Reward 668.74 - Mean Length 213.14 - Mean Loss 1.135 - Mean Q Value 31.407 - Time Delta 69.537 - Time 2023-11-03T11:47:30


  4%|▍         | 1701/40000 [1:27:11<75:45:16,  7.12s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 363924


  4%|▍         | 1720/40000 [1:28:16<48:26:53,  4.56s/it]

Episode 1720 - Step 368797 - Epsilon 0.9119234201767867 - Mean Reward 672.67 - Mean Length 211.63 - Mean Loss 1.135 - Mean Q Value 31.51 - Time Delta 70.739 - Time 2023-11-03T11:48:40


  4%|▍         | 1740/40000 [1:29:12<22:33:12,  2.12s/it]

Episode 1740 - Step 372458 - Epsilon 0.9110891639967953 - Mean Reward 673.68 - Mean Length 205.43 - Mean Loss 1.134 - Mean Q Value 31.671 - Time Delta 52.772 - Time 2023-11-03T11:49:33


  4%|▍         | 1760/40000 [1:30:34<53:59:50,  5.08s/it]

Episode 1760 - Step 378530 - Epsilon 0.909707179664395 - Mean Reward 718.49 - Mean Length 233.59 - Mean Loss 1.13 - Mean Q Value 31.846 - Time Delta 87.939 - Time 2023-11-03T11:51:01


  4%|▍         | 1780/40000 [1:31:38<27:04:03,  2.55s/it]

Episode 1780 - Step 382494 - Epsilon 0.9088061062921896 - Mean Reward 720.83 - Mean Length 234.06 - Mean Loss 1.143 - Mean Q Value 31.832 - Time Delta 57.574 - Time 2023-11-03T11:51:59


  4%|▍         | 1800/40000 [1:32:43<37:43:26,  3.56s/it]

Episode 1800 - Step 387059 - Epsilon 0.9077695228066258 - Mean Reward 720.67 - Mean Length 231.35 - Mean Loss 1.141 - Mean Q Value 31.769 - Time Delta 65.871 - Time 2023-11-03T11:53:05


  5%|▍         | 1801/40000 [1:32:46<35:40:38,  3.36s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 387059


  5%|▍         | 1820/40000 [1:33:39<29:51:20,  2.82s/it]

Episode 1820 - Step 391210 - Epsilon 0.906827973497406 - Mean Reward 685.66 - Mean Length 224.13 - Mean Loss 1.149 - Mean Q Value 31.616 - Time Delta 60.266 - Time 2023-11-03T11:54:05


  5%|▍         | 1840/40000 [1:34:44<38:32:38,  3.64s/it]

Episode 1840 - Step 395311 - Epsilon 0.9058987244389879 - Mean Reward 685.15 - Mean Length 228.53 - Mean Loss 1.141 - Mean Q Value 31.556 - Time Delta 59.544 - Time 2023-11-03T11:55:04


  5%|▍         | 1860/40000 [1:35:37<19:21:41,  1.83s/it]

Episode 1860 - Step 398909 - Epsilon 0.9050842348060585 - Mean Reward 647.97 - Mean Length 203.79 - Mean Loss 1.133 - Mean Q Value 31.436 - Time Delta 52.328 - Time 2023-11-03T11:55:57


  5%|▍         | 1880/40000 [1:36:40<28:11:15,  2.66s/it]

Episode 1880 - Step 403288 - Epsilon 0.9040939358798598 - Mean Reward 628.05 - Mean Length 207.94 - Mean Loss 1.128 - Mean Q Value 31.478 - Time Delta 63.733 - Time 2023-11-03T11:57:00


  5%|▍         | 1900/40000 [1:37:21<13:28:07,  1.27s/it]

Episode 1900 - Step 406046 - Epsilon 0.9034707778914932 - Mean Reward 598.82 - Mean Length 189.87 - Mean Loss 1.103 - Mean Q Value 31.574 - Time Delta 40.372 - Time 2023-11-03T11:57:41


  5%|▍         | 1901/40000 [1:37:22<12:43:01,  1.20s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 406046


  5%|▍         | 1920/40000 [1:38:15<31:09:37,  2.95s/it]

Episode 1920 - Step 410304 - Epsilon 0.9025095448352026 - Mean Reward 601.36 - Mean Length 190.94 - Mean Loss 1.072 - Mean Q Value 31.685 - Time Delta 61.643 - Time 2023-11-03T11:58:42


  5%|▍         | 1940/40000 [1:39:08<22:10:13,  2.10s/it]

Episode 1940 - Step 413580 - Epsilon 0.9017706920270495 - Mean Reward 596.44 - Mean Length 182.69 - Mean Loss 1.074 - Mean Q Value 31.652 - Time Delta 47.399 - Time 2023-11-03T11:59:30


  5%|▍         | 1960/40000 [1:40:04<29:19:27,  2.78s/it]

Episode 1960 - Step 417585 - Epsilon 0.9008682458712237 - Mean Reward 606.29 - Mean Length 186.76 - Mean Loss 1.07 - Mean Q Value 31.87 - Time Delta 57.769 - Time 2023-11-03T12:00:28


  5%|▍         | 1980/40000 [1:41:34<46:33:30,  4.41s/it]

Episode 1980 - Step 423679 - Epsilon 0.8994968178775576 - Mean Reward 648.64 - Mean Length 203.91 - Mean Loss 1.062 - Mean Q Value 31.904 - Time Delta 87.585 - Time 2023-11-03T12:01:55


  5%|▌         | 2000/40000 [1:42:28<35:41:35,  3.38s/it]

Episode 2000 - Step 427467 - Epsilon 0.8986453974956651 - Mean Reward 679.74 - Mean Length 214.21 - Mean Loss 1.069 - Mean Q Value 31.806 - Time Delta 54.824 - Time 2023-11-03T12:02:50


  5%|▌         | 2001/40000 [1:42:32<37:55:23,  3.59s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 427467


  5%|▌         | 2020/40000 [1:43:40<58:54:55,  5.58s/it]

Episode 2020 - Step 432468 - Epsilon 0.897522568001985 - Mean Reward 688.33 - Mean Length 221.64 - Mean Loss 1.087 - Mean Q Value 31.741 - Time Delta 72.652 - Time 2023-11-03T12:04:03


  5%|▌         | 2040/40000 [1:45:04<46:43:32,  4.43s/it]

Episode 2040 - Step 438164 - Epsilon 0.896245405261017 - Mean Reward 740.39 - Mean Length 245.84 - Mean Loss 1.088 - Mean Q Value 31.703 - Time Delta 81.71 - Time 2023-11-03T12:05:24


  5%|▌         | 2060/40000 [1:46:12<35:50:36,  3.40s/it]

Episode 2060 - Step 442979 - Epsilon 0.8951671987952154 - Mean Reward 766.6 - Mean Length 253.94 - Mean Loss 1.103 - Mean Q Value 31.36 - Time Delta 69.824 - Time 2023-11-03T12:06:34


  5%|▌         | 2080/40000 [1:47:26<39:42:46,  3.77s/it]

Episode 2080 - Step 448927 - Epsilon 0.8938370741968882 - Mean Reward 750.17 - Mean Length 252.48 - Mean Loss 1.12 - Mean Q Value 31.146 - Time Delta 85.722 - Time 2023-11-03T12:08:00


  5%|▌         | 2100/40000 [1:49:13<43:40:51,  4.15s/it]

Episode 2100 - Step 455366 - Epsilon 0.892399377263561 - Mean Reward 772.85 - Mean Length 278.99 - Mean Loss 1.141 - Mean Q Value 30.946 - Time Delta 92.449 - Time 2023-11-03T12:09:32


  5%|▌         | 2101/40000 [1:49:14<34:21:15,  3.26s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 455366


  5%|▌         | 2120/40000 [1:50:19<28:02:35,  2.67s/it]

Episode 2120 - Step 460293 - Epsilon 0.8913008408931268 - Mean Reward 779.66 - Mean Length 278.25 - Mean Loss 1.147 - Mean Q Value 30.62 - Time Delta 70.735 - Time 2023-11-03T12:10:43


  5%|▌         | 2140/40000 [1:51:33<51:47:35,  4.92s/it]

Episode 2140 - Step 465102 - Epsilon 0.8902299182105394 - Mean Reward 748.92 - Mean Length 269.38 - Mean Loss 1.15 - Mean Q Value 30.295 - Time Delta 69.598 - Time 2023-11-03T12:11:53


  5%|▌         | 2160/40000 [1:53:06<44:20:19,  4.22s/it]

Episode 2160 - Step 471609 - Epsilon 0.8887829137863376 - Mean Reward 765.11 - Mean Length 286.3 - Mean Loss 1.144 - Mean Q Value 30.005 - Time Delta 93.217 - Time 2023-11-03T12:13:26


  5%|▌         | 2180/40000 [1:53:56<31:18:32,  2.98s/it]

Episode 2180 - Step 475096 - Epsilon 0.8880084548006841 - Mean Reward 737.56 - Mean Length 261.69 - Mean Loss 1.132 - Mean Q Value 29.722 - Time Delta 50.393 - Time 2023-11-03T12:14:16


  6%|▌         | 2200/40000 [1:54:49<26:12:23,  2.50s/it]

Episode 2200 - Step 478809 - Epsilon 0.8871845433064939 - Mean Reward 708.92 - Mean Length 234.43 - Mean Loss 1.118 - Mean Q Value 29.579 - Time Delta 53.831 - Time 2023-11-03T12:15:10


  6%|▌         | 2201/40000 [1:54:52<27:05:55,  2.58s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 478809


  6%|▌         | 2220/40000 [1:55:47<26:37:13,  2.54s/it]

Episode 2220 - Step 482970 - Epsilon 0.8862621293236443 - Mean Reward 707.32 - Mean Length 226.77 - Mean Loss 1.098 - Mean Q Value 29.57 - Time Delta 61.109 - Time 2023-11-03T12:16:11


  6%|▌         | 2240/40000 [1:56:36<24:23:10,  2.32s/it]

Episode 2240 - Step 486243 - Epsilon 0.8855372418556438 - Mean Reward 686.87 - Mean Length 211.41 - Mean Loss 1.086 - Mean Q Value 29.621 - Time Delta 47.423 - Time 2023-11-03T12:16:59


  6%|▌         | 2260/40000 [1:57:18<30:17:02,  2.89s/it]

Episode 2260 - Step 489238 - Epsilon 0.8848744439288012 - Mean Reward 622.99 - Mean Length 176.29 - Mean Loss 1.083 - Mean Q Value 29.842 - Time Delta 43.494 - Time 2023-11-03T12:17:42


  6%|▌         | 2280/40000 [1:58:42<54:03:57,  5.16s/it]

Episode 2280 - Step 494712 - Epsilon 0.8836643213161056 - Mean Reward 658.54 - Mean Length 196.16 - Mean Loss 1.068 - Mean Q Value 29.99 - Time Delta 79.409 - Time 2023-11-03T12:19:02


  6%|▌         | 2300/40000 [1:59:48<27:19:16,  2.61s/it]

Episode 2300 - Step 499896 - Epsilon 0.882519834000429 - Mean Reward 655.95 - Mean Length 210.87 - Mean Loss 1.056 - Mean Q Value 30.04 - Time Delta 74.664 - Time 2023-11-03T12:20:16


  6%|▌         | 2301/40000 [1:59:58<51:08:56,  4.88s/it]

MarioNet saved to checkpoints2/mario_net_0.chkpt at step 499896
MarioNet saved to checkpoints2/mario_net_1.chkpt at step 500000


  6%|▌         | 2320/40000 [2:00:59<22:57:16,  2.19s/it]

Episode 2320 - Step 504401 - Epsilon 0.8815264554143938 - Mean Reward 650.57 - Mean Length 214.31 - Mean Loss 1.045 - Mean Q Value 30.034 - Time Delta 65.523 - Time 2023-11-03T12:21:22


  6%|▌         | 2340/40000 [2:01:45<30:39:28,  2.93s/it]

Episode 2340 - Step 507649 - Epsilon 0.8808109463790356 - Mean Reward 640.1 - Mean Length 214.06 - Mean Loss 1.029 - Mean Q Value 30.05 - Time Delta 47.401 - Time 2023-11-03T12:22:09


  6%|▌         | 2360/40000 [2:02:44<22:14:57,  2.13s/it]

Episode 2360 - Step 511465 - Epsilon 0.879971053323773 - Mean Reward 654.11 - Mean Length 222.27 - Mean Loss 1.015 - Mean Q Value 29.88 - Time Delta 55.163 - Time 2023-11-03T12:23:04


  6%|▌         | 2380/40000 [2:03:55<40:20:21,  3.86s/it]

Episode 2380 - Step 516343 - Epsilon 0.8788985825619953 - Mean Reward 655.46 - Mean Length 216.31 - Mean Loss 1.006 - Mean Q Value 29.719 - Time Delta 71.036 - Time 2023-11-03T12:24:15


  6%|▌         | 2400/40000 [2:04:30<8:20:13,  1.25it/s]

Episode 2400 - Step 518879 - Epsilon 0.8783415373929209 - Mean Reward 629.39 - Mean Length 189.83 - Mean Loss 0.976 - Mean Q Value 29.556 - Time Delta 36.782 - Time 2023-11-03T12:24:52


  6%|▌         | 2401/40000 [2:04:34<19:02:21,  1.82s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 518879


  6%|▌         | 2420/40000 [2:05:26<30:13:54,  2.90s/it]

Episode 2420 - Step 522679 - Epsilon 0.8775075090542129 - Mean Reward 602.64 - Mean Length 182.78 - Mean Loss 0.973 - Mean Q Value 29.536 - Time Delta 54.995 - Time 2023-11-03T12:25:47


  6%|▌         | 2440/40000 [2:06:19<24:16:46,  2.33s/it]

Episode 2440 - Step 526211 - Epsilon 0.8767330118176494 - Mean Reward 626.26 - Mean Length 185.62 - Mean Loss 0.979 - Mean Q Value 29.572 - Time Delta 52.05 - Time 2023-11-03T12:26:39


  6%|▌         | 2460/40000 [2:07:16<41:05:41,  3.94s/it]

Episode 2460 - Step 530142 - Epsilon 0.8758718255772171 - Mean Reward 638.16 - Mean Length 186.77 - Mean Loss 0.987 - Mean Q Value 29.698 - Time Delta 57.741 - Time 2023-11-03T12:27:37


  6%|▌         | 2480/40000 [2:08:11<19:37:13,  1.88s/it]

Episode 2480 - Step 533936 - Epsilon 0.8750414549122874 - Mean Reward 620.42 - Mean Length 175.93 - Mean Loss 1.01 - Mean Q Value 29.874 - Time Delta 55.432 - Time 2023-11-03T12:28:32


  6%|▋         | 2500/40000 [2:09:14<34:50:31,  3.34s/it]

Episode 2500 - Step 538197 - Epsilon 0.8741098131896268 - Mean Reward 650.78 - Mean Length 193.18 - Mean Loss 1.029 - Mean Q Value 30.12 - Time Delta 62.621 - Time 2023-11-03T12:29:35


  6%|▋         | 2501/40000 [2:09:17<33:52:26,  3.25s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 538197


  6%|▋         | 2520/40000 [2:10:16<43:19:39,  4.16s/it]

Episode 2520 - Step 542409 - Epsilon 0.8731898598816344 - Mean Reward 656.47 - Mean Length 197.3 - Mean Loss 1.036 - Mean Q Value 30.239 - Time Delta 61.457 - Time 2023-11-03T12:30:36


  6%|▋         | 2540/40000 [2:10:56<11:40:42,  1.12s/it]

Episode 2540 - Step 545154 - Epsilon 0.8725908388279368 - Mean Reward 627.97 - Mean Length 189.43 - Mean Loss 1.043 - Mean Q Value 30.38 - Time Delta 40.243 - Time 2023-11-03T12:31:17


  6%|▋         | 2560/40000 [2:11:54<49:23:19,  4.75s/it]

Episode 2560 - Step 549038 - Epsilon 0.8717439642416082 - Mean Reward 617.82 - Mean Length 188.96 - Mean Loss 1.041 - Mean Q Value 30.576 - Time Delta 56.3 - Time 2023-11-03T12:32:13


  6%|▋         | 2580/40000 [2:12:35<17:23:00,  1.67s/it]

Episode 2580 - Step 551878 - Epsilon 0.8711252456206736 - Mean Reward 594.05 - Mean Length 179.42 - Mean Loss 1.024 - Mean Q Value 30.841 - Time Delta 41.156 - Time 2023-11-03T12:32:54


  6%|▋         | 2600/40000 [2:13:25<19:11:12,  1.85s/it]

Episode 2600 - Step 555463 - Epsilon 0.8703448492889718 - Mean Reward 580.4 - Mean Length 172.66 - Mean Loss 1.042 - Mean Q Value 31.105 - Time Delta 51.955 - Time 2023-11-03T12:33:46


  7%|▋         | 2601/40000 [2:13:28<22:30:12,  2.17s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 555463


  7%|▋         | 2620/40000 [2:14:32<35:02:38,  3.38s/it]

Episode 2620 - Step 560327 - Epsilon 0.8692871530295332 - Mean Reward 604.68 - Mean Length 179.18 - Mean Loss 1.039 - Mean Q Value 31.426 - Time Delta 70.081 - Time 2023-11-03T12:34:56


  7%|▋         | 2640/40000 [2:15:56<50:00:01,  4.82s/it]

Episode 2640 - Step 565869 - Epsilon 0.8680835894895735 - Mean Reward 627.16 - Mean Length 207.15 - Mean Loss 1.036 - Mean Q Value 31.592 - Time Delta 80.379 - Time 2023-11-03T12:36:17


  7%|▋         | 2660/40000 [2:17:03<15:27:46,  1.49s/it]

Episode 2660 - Step 570619 - Epsilon 0.8670533519227529 - Mean Reward 630.95 - Mean Length 215.81 - Mean Loss 1.034 - Mean Q Value 31.67 - Time Delta 69.137 - Time 2023-11-03T12:37:26


  7%|▋         | 2680/40000 [2:17:49<21:28:16,  2.07s/it]

Episode 2680 - Step 573665 - Epsilon 0.8663933420433286 - Mean Reward 634.61 - Mean Length 217.87 - Mean Loss 1.048 - Mean Q Value 31.696 - Time Delta 44.323 - Time 2023-11-03T12:38:10


  7%|▋         | 2700/40000 [2:19:08<32:30:45,  3.14s/it]

Episode 2700 - Step 579274 - Epsilon 0.8651792932264321 - Mean Reward 670.57 - Mean Length 238.11 - Mean Loss 1.016 - Mean Q Value 31.696 - Time Delta 81.335 - Time 2023-11-03T12:39:31


  7%|▋         | 2701/40000 [2:19:13<38:34:20,  3.72s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 579274


  7%|▋         | 2720/40000 [2:20:09<39:56:12,  3.86s/it]

Episode 2720 - Step 583844 - Epsilon 0.8641913902073567 - Mean Reward 676.47 - Mean Length 235.17 - Mean Loss 1.018 - Mean Q Value 31.599 - Time Delta 65.462 - Time 2023-11-03T12:40:37


  7%|▋         | 2740/40000 [2:21:09<16:42:50,  1.61s/it]

Episode 2740 - Step 587772 - Epsilon 0.8633431706999677 - Mean Reward 685.56 - Mean Length 219.03 - Mean Loss 1.02 - Mean Q Value 31.479 - Time Delta 56.319 - Time 2023-11-03T12:41:33


  7%|▋         | 2760/40000 [2:22:05<27:18:45,  2.64s/it]

Episode 2760 - Step 591284 - Epsilon 0.8625854879726658 - Mean Reward 688.59 - Mean Length 206.65 - Mean Loss 1.003 - Mean Q Value 31.404 - Time Delta 51.181 - Time 2023-11-03T12:42:24


  7%|▋         | 2780/40000 [2:22:43<35:57:38,  3.48s/it]

Episode 2780 - Step 593915 - Epsilon 0.862018308848457 - Mean Reward 676.64 - Mean Length 202.5 - Mean Loss 0.981 - Mean Q Value 31.408 - Time Delta 38.125 - Time 2023-11-03T12:43:02


  7%|▋         | 2800/40000 [2:23:37<63:44:08,  6.17s/it]

Episode 2800 - Step 597694 - Epsilon 0.8612043015265614 - Mean Reward 631.38 - Mean Length 184.2 - Mean Loss 0.997 - Mean Q Value 31.457 - Time Delta 54.742 - Time 2023-11-03T12:43:57


  7%|▋         | 2801/40000 [2:23:39<51:00:02,  4.94s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 597694


  7%|▋         | 2820/40000 [2:24:41<37:31:26,  3.63s/it]

Episode 2820 - Step 602326 - Epsilon 0.8602076040199009 - Mean Reward 615.38 - Mean Length 184.82 - Mean Loss 1.011 - Mean Q Value 31.626 - Time Delta 66.622 - Time 2023-11-03T12:45:04


  7%|▋         | 2840/40000 [2:25:29<31:09:39,  3.02s/it]

Episode 2840 - Step 605371 - Epsilon 0.8595530200820479 - Mean Reward 585.05 - Mean Length 175.99 - Mean Loss 1.01 - Mean Q Value 31.813 - Time Delta 44.472 - Time 2023-11-03T12:45:48


  7%|▋         | 2860/40000 [2:26:31<35:44:08,  3.46s/it]

Episode 2860 - Step 610070 - Epsilon 0.8585438529210384 - Mean Reward 581.71 - Mean Length 187.86 - Mean Loss 1.047 - Mean Q Value 32.027 - Time Delta 67.839 - Time 2023-11-03T12:46:56


  7%|▋         | 2880/40000 [2:27:29<26:18:34,  2.55s/it]

Episode 2880 - Step 613903 - Epsilon 0.8577215472212136 - Mean Reward 621.56 - Mean Length 199.88 - Mean Loss 1.091 - Mean Q Value 32.107 - Time Delta 55.229 - Time 2023-11-03T12:47:51


  7%|▋         | 2900/40000 [2:28:41<20:41:56,  2.01s/it]

Episode 2900 - Step 618853 - Epsilon 0.8566607731631041 - Mean Reward 647.69 - Mean Length 211.59 - Mean Loss 1.102 - Mean Q Value 32.142 - Time Delta 71.899 - Time 2023-11-03T12:49:03


  7%|▋         | 2901/40000 [2:28:45<26:40:45,  2.59s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 618853


  7%|▋         | 2920/40000 [2:29:56<63:09:42,  6.13s/it]

Episode 2920 - Step 624111 - Epsilon 0.8555354322283281 - Mean Reward 661.36 - Mean Length 217.85 - Mean Loss 1.095 - Mean Q Value 32.045 - Time Delta 75.83 - Time 2023-11-03T12:50:19


  7%|▋         | 2940/40000 [2:31:02<33:48:44,  3.28s/it]

Episode 2940 - Step 628708 - Epsilon 0.8545527727787121 - Mean Reward 698.6 - Mean Length 233.37 - Mean Loss 1.097 - Mean Q Value 31.802 - Time Delta 66.466 - Time 2023-11-03T12:51:26


  7%|▋         | 2960/40000 [2:32:00<23:18:56,  2.27s/it]

Episode 2960 - Step 632454 - Epsilon 0.853752858625171 - Mean Reward 718.32 - Mean Length 223.84 - Mean Loss 1.078 - Mean Q Value 31.608 - Time Delta 54.153 - Time 2023-11-03T12:52:20


  7%|▋         | 2980/40000 [2:32:43<12:41:23,  1.23s/it]

Episode 2980 - Step 636028 - Epsilon 0.8529903710426265 - Mean Reward 687.16 - Mean Length 221.25 - Mean Loss 1.031 - Mean Q Value 31.447 - Time Delta 51.4 - Time 2023-11-03T12:53:11


  8%|▊         | 3000/40000 [2:33:30<23:37:52,  2.30s/it]

Episode 3000 - Step 638761 - Epsilon 0.8524077643543796 - Mean Reward 665.98 - Mean Length 199.08 - Mean Loss 0.994 - Mean Q Value 31.263 - Time Delta 39.785 - Time 2023-11-03T12:53:51


  8%|▊         | 3001/40000 [2:33:33<23:06:54,  2.25s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 638761


  8%|▊         | 3020/40000 [2:34:00<13:41:57,  1.33s/it]

Episode 3020 - Step 640755 - Epsilon 0.8519829449257251 - Mean Reward 611.47 - Mean Length 166.44 - Mean Loss 0.96 - Mean Q Value 31.308 - Time Delta 28.938 - Time 2023-11-03T12:54:20


  8%|▊         | 3040/40000 [2:34:43<11:25:17,  1.11s/it]

Episode 3040 - Step 643765 - Epsilon 0.8513420688393382 - Mean Reward 574.07 - Mean Length 150.57 - Mean Loss 0.935 - Mean Q Value 31.536 - Time Delta 43.021 - Time 2023-11-03T12:55:03


  8%|▊         | 3060/40000 [2:36:05<23:34:12,  2.30s/it]

Episode 3060 - Step 649503 - Epsilon 0.8501216940119872 - Mean Reward 560.38 - Mean Length 170.49 - Mean Loss 0.903 - Mean Q Value 31.623 - Time Delta 81.537 - Time 2023-11-03T12:56:24


  8%|▊         | 3080/40000 [2:37:02<41:37:42,  4.06s/it]

Episode 3080 - Step 654258 - Epsilon 0.8491117121487478 - Mean Reward 564.37 - Mean Length 182.3 - Mean Loss 0.901 - Mean Q Value 31.71 - Time Delta 67.639 - Time 2023-11-03T12:57:32


  8%|▊         | 3100/40000 [2:38:06<20:28:08,  2.00s/it]

Episode 3100 - Step 658420 - Epsilon 0.8482286707836963 - Mean Reward 605.75 - Mean Length 196.59 - Mean Loss 0.919 - Mean Q Value 31.783 - Time Delta 59.982 - Time 2023-11-03T12:58:32


  8%|▊         | 3101/40000 [2:38:14<38:36:53,  3.77s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 658420


  8%|▊         | 3120/40000 [2:39:01<23:40:35,  2.31s/it]

Episode 3120 - Step 661873 - Epsilon 0.8474967532514066 - Mean Reward 629.85 - Mean Length 211.18 - Mean Loss 0.939 - Mean Q Value 31.658 - Time Delta 49.598 - Time 2023-11-03T12:59:22


  8%|▊         | 3140/40000 [2:39:56<22:07:11,  2.16s/it]

Episode 3140 - Step 665712 - Epsilon 0.8466837583390754 - Mean Reward 647.05 - Mean Length 219.47 - Mean Loss 0.943 - Mean Q Value 31.557 - Time Delta 55.099 - Time 2023-11-03T13:00:17


  8%|▊         | 3160/40000 [2:40:56<35:27:56,  3.47s/it]

Episode 3160 - Step 670245 - Epsilon 0.8457247973238451 - Mean Reward 661.95 - Mean Length 207.42 - Mean Loss 0.958 - Mean Q Value 31.648 - Time Delta 65.152 - Time 2023-11-03T13:01:22


  8%|▊         | 3180/40000 [2:41:55<33:07:16,  3.24s/it]

Episode 3180 - Step 674020 - Epsilon 0.8449270209564708 - Mean Reward 690.66 - Mean Length 197.62 - Mean Loss 0.971 - Mean Q Value 31.803 - Time Delta 54.372 - Time 2023-11-03T13:02:16


  8%|▊         | 3200/40000 [2:42:51<18:18:25,  1.79s/it]

Episode 3200 - Step 678006 - Epsilon 0.8440854704469579 - Mean Reward 664.95 - Mean Length 195.86 - Mean Loss 0.972 - Mean Q Value 32.009 - Time Delta 57.313 - Time 2023-11-03T13:03:14


  8%|▊         | 3201/40000 [2:42:55<24:35:14,  2.41s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 678006


  8%|▊         | 3220/40000 [2:43:30<13:42:37,  1.34s/it]

Episode 3220 - Step 681195 - Epsilon 0.8434127914037816 - Mean Reward 658.77 - Mean Length 193.22 - Mean Loss 0.977 - Mean Q Value 32.341 - Time Delta 45.324 - Time 2023-11-03T13:03:59


  8%|▊         | 3240/40000 [2:44:42<15:46:36,  1.55s/it]

Episode 3240 - Step 685593 - Epsilon 0.8424859685378886 - Mean Reward 644.36 - Mean Length 198.81 - Mean Loss 1.007 - Mean Q Value 32.584 - Time Delta 62.889 - Time 2023-11-03T13:05:02


  8%|▊         | 3260/40000 [2:45:37<27:11:31,  2.66s/it]

Episode 3260 - Step 689466 - Epsilon 0.8416706261881316 - Mean Reward 625.87 - Mean Length 192.21 - Mean Loss 1.027 - Mean Q Value 32.675 - Time Delta 55.109 - Time 2023-11-03T13:05:57


  8%|▊         | 3280/40000 [2:46:34<31:47:10,  3.12s/it]

Episode 3280 - Step 693576 - Epsilon 0.8408062536589888 - Mean Reward 621.67 - Mean Length 195.56 - Mean Loss 1.041 - Mean Q Value 32.527 - Time Delta 58.539 - Time 2023-11-03T13:06:55


  8%|▊         | 3300/40000 [2:47:36<23:36:22,  2.32s/it]

Episode 3300 - Step 697844 - Epsilon 0.8399095917283383 - Mean Reward 639.8 - Mean Length 198.38 - Mean Loss 1.055 - Mean Q Value 32.496 - Time Delta 60.399 - Time 2023-11-03T13:07:56


  8%|▊         | 3301/40000 [2:47:37<19:35:10,  1.92s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 697844


  8%|▊         | 3320/40000 [2:48:29<38:30:19,  3.78s/it]

Episode 3320 - Step 701540 - Epsilon 0.83913387360547 - Mean Reward 659.86 - Mean Length 203.45 - Mean Loss 1.073 - Mean Q Value 32.401 - Time Delta 52.396 - Time 2023-11-03T13:08:48


  8%|▊         | 3340/40000 [2:49:47<25:52:15,  2.54s/it]

Episode 3340 - Step 707028 - Epsilon 0.8379833712120264 - Mean Reward 711.66 - Mean Length 214.35 - Mean Loss 1.081 - Mean Q Value 32.385 - Time Delta 77.648 - Time 2023-11-03T13:10:06


  8%|▊         | 3360/40000 [2:50:35<19:57:23,  1.96s/it]

Episode 3360 - Step 710400 - Epsilon 0.8372772488141368 - Mean Reward 722.31 - Mean Length 209.34 - Mean Loss 1.107 - Mean Q Value 32.394 - Time Delta 48.825 - Time 2023-11-03T13:10:55


  8%|▊         | 3380/40000 [2:51:43<52:43:17,  5.18s/it]

Episode 3380 - Step 715621 - Epsilon 0.8361851054635635 - Mean Reward 742.71 - Mean Length 220.45 - Mean Loss 1.149 - Mean Q Value 32.668 - Time Delta 74.59 - Time 2023-11-03T13:12:09


  8%|▊         | 3400/40000 [2:52:38<32:18:10,  3.18s/it]

Episode 3400 - Step 719016 - Epsilon 0.835475694365449 - Mean Reward 722.46 - Mean Length 211.72 - Mean Loss 1.182 - Mean Q Value 32.791 - Time Delta 48.633 - Time 2023-11-03T13:12:58


  9%|▊         | 3401/40000 [2:52:40<28:46:55,  2.83s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 719016


  9%|▊         | 3420/40000 [2:53:50<28:59:39,  2.85s/it]

Episode 3420 - Step 724012 - Epsilon 0.834432836492999 - Mean Reward 723.31 - Mean Length 224.72 - Mean Loss 1.2 - Mean Q Value 32.864 - Time Delta 72.206 - Time 2023-11-03T13:14:10


  9%|▊         | 3440/40000 [2:54:32<17:25:11,  1.72s/it]

Episode 3440 - Step 727257 - Epsilon 0.8337561772767192 - Mean Reward 676.35 - Mean Length 202.29 - Mean Loss 1.205 - Mean Q Value 32.984 - Time Delta 46.9 - Time 2023-11-03T13:14:57


  9%|▊         | 3460/40000 [2:55:23<19:10:10,  1.89s/it]

Episode 3460 - Step 730545 - Epsilon 0.8330711112139974 - Mean Reward 661.54 - Mean Length 201.45 - Mean Loss 1.204 - Mean Q Value 33.064 - Time Delta 47.86 - Time 2023-11-03T13:15:45


  9%|▊         | 3480/40000 [2:56:03<13:24:49,  1.32s/it]

Episode 3480 - Step 733527 - Epsilon 0.8324502880634812 - Mean Reward 624.94 - Mean Length 179.06 - Mean Loss 1.183 - Mean Q Value 33.064 - Time Delta 42.975 - Time 2023-11-03T13:16:28


  9%|▉         | 3500/40000 [2:57:09<25:09:13,  2.48s/it]

Episode 3500 - Step 737955 - Epsilon 0.8315292753539271 - Mean Reward 637.11 - Mean Length 189.39 - Mean Loss 1.189 - Mean Q Value 33.169 - Time Delta 62.507 - Time 2023-11-03T13:17:30


  9%|▉         | 3501/40000 [2:57:12<26:39:19,  2.63s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 737955


  9%|▉         | 3520/40000 [2:57:53<21:27:03,  2.12s/it]

Episode 3520 - Step 740924 - Epsilon 0.8309123016747714 - Mean Reward 617.71 - Mean Length 169.12 - Mean Loss 1.202 - Mean Q Value 33.313 - Time Delta 42.318 - Time 2023-11-03T13:18:13


  9%|▉         | 3540/40000 [2:58:53<46:57:46,  4.64s/it]

Episode 3540 - Step 745260 - Epsilon 0.8300120806350583 - Mean Reward 647.29 - Mean Length 180.03 - Mean Loss 1.207 - Mean Q Value 33.311 - Time Delta 61.716 - Time 2023-11-03T13:19:14


  9%|▉         | 3560/40000 [2:59:44<31:31:49,  3.11s/it]

Episode 3560 - Step 748676 - Epsilon 0.8293035528139703 - Mean Reward 652.05 - Mean Length 181.31 - Mean Loss 1.199 - Mean Q Value 33.443 - Time Delta 48.916 - Time 2023-11-03T13:20:03


  9%|▉         | 3580/40000 [3:00:43<29:10:32,  2.88s/it]

Episode 3580 - Step 752841 - Epsilon 0.8284404897917609 - Mean Reward 672.1 - Mean Length 193.14 - Mean Loss 1.218 - Mean Q Value 33.673 - Time Delta 58.805 - Time 2023-11-03T13:21:02


  9%|▉         | 3600/40000 [3:01:39<25:15:36,  2.50s/it]

Episode 3600 - Step 756928 - Epsilon 0.8275944629030315 - Mean Reward 673.73 - Mean Length 189.73 - Mean Loss 1.222 - Mean Q Value 33.961 - Time Delta 57.875 - Time 2023-11-03T13:22:00


  9%|▉         | 3601/40000 [3:01:42<26:47:24,  2.65s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 756928


  9%|▉         | 3620/40000 [3:02:34<22:19:39,  2.21s/it]

Episode 3620 - Step 760710 - Epsilon 0.826812342046385 - Mean Reward 699.03 - Mean Length 197.86 - Mean Loss 1.242 - Mean Q Value 34.206 - Time Delta 54.617 - Time 2023-11-03T13:22:55


  9%|▉         | 3640/40000 [3:03:22<27:47:35,  2.75s/it]

Episode 3640 - Step 764265 - Epsilon 0.8260778389286468 - Mean Reward 680.44 - Mean Length 190.05 - Mean Loss 1.267 - Mean Q Value 34.459 - Time Delta 51.964 - Time 2023-11-03T13:23:47


  9%|▉         | 3660/40000 [3:04:11<29:56:47,  2.97s/it]

Episode 3660 - Step 767374 - Epsilon 0.825436019307486 - Mean Reward 664.58 - Mean Length 186.98 - Mean Loss 1.305 - Mean Q Value 34.671 - Time Delta 45.108 - Time 2023-11-03T13:24:32


  9%|▉         | 3680/40000 [3:05:01<33:05:45,  3.28s/it]

Episode 3680 - Step 771046 - Epsilon 0.8246786166479917 - Mean Reward 641.83 - Mean Length 182.05 - Mean Loss 1.301 - Mean Q Value 34.757 - Time Delta 52.39 - Time 2023-11-03T13:25:24


  9%|▉         | 3700/40000 [3:06:01<19:22:48,  1.92s/it]

Episode 3700 - Step 775193 - Epsilon 0.8238240740353161 - Mean Reward 646.94 - Mean Length 182.65 - Mean Loss 1.307 - Mean Q Value 34.727 - Time Delta 58.675 - Time 2023-11-03T13:26:23


  9%|▉         | 3701/40000 [3:06:04<22:44:13,  2.25s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 775193


  9%|▉         | 3720/40000 [3:06:51<15:30:09,  1.54s/it]

Episode 3720 - Step 778744 - Epsilon 0.8230930486540127 - Mean Reward 625.1 - Mean Length 180.34 - Mean Loss 1.275 - Mean Q Value 34.692 - Time Delta 50.141 - Time 2023-11-03T13:27:13


  9%|▉         | 3740/40000 [3:07:35<24:39:32,  2.45s/it]

Episode 3740 - Step 781946 - Epsilon 0.8224344262349348 - Mean Reward 597.92 - Mean Length 176.81 - Mean Loss 1.275 - Mean Q Value 34.672 - Time Delta 45.277 - Time 2023-11-03T13:27:58


  9%|▉         | 3760/40000 [3:08:37<20:56:23,  2.08s/it]

Episode 3760 - Step 786058 - Epsilon 0.8215893979579695 - Mean Reward 600.64 - Mean Length 186.84 - Mean Loss 1.264 - Mean Q Value 34.577 - Time Delta 58.174 - Time 2023-11-03T13:28:56


  9%|▉         | 3780/40000 [3:09:19<23:22:36,  2.32s/it]

Episode 3780 - Step 788984 - Epsilon 0.8209886249976822 - Mean Reward 611.39 - Mean Length 179.38 - Mean Loss 1.25 - Mean Q Value 34.593 - Time Delta 42.105 - Time 2023-11-03T13:29:38


 10%|▉         | 3800/40000 [3:10:06<20:06:48,  2.00s/it]

Episode 3800 - Step 792291 - Epsilon 0.8203101530690372 - Mean Reward 568.86 - Mean Length 170.98 - Mean Loss 1.234 - Mean Q Value 34.549 - Time Delta 46.899 - Time 2023-11-03T13:30:25


 10%|▉         | 3801/40000 [3:10:07<17:13:55,  1.71s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 792291


 10%|▉         | 3820/40000 [3:10:46<12:08:34,  1.21s/it]

Episode 3820 - Step 795063 - Epsilon 0.819741874992942 - Mean Reward 551.03 - Mean Length 163.19 - Mean Loss 1.239 - Mean Q Value 34.497 - Time Delta 39.725 - Time 2023-11-03T13:31:05


 10%|▉         | 3840/40000 [3:11:50<43:44:41,  4.36s/it]

Episode 3840 - Step 799861 - Epsilon 0.8187591839776743 - Mean Reward 592.72 - Mean Length 179.15 - Mean Loss 1.249 - Mean Q Value 34.531 - Time Delta 68.469 - Time 2023-11-03T13:32:13


 10%|▉         | 3860/40000 [3:12:49<17:19:07,  1.73s/it]

Episode 3860 - Step 803720 - Epsilon 0.8179696668606872 - Mean Reward 614.5 - Mean Length 176.62 - Mean Loss 1.236 - Mean Q Value 34.464 - Time Delta 54.928 - Time 2023-11-03T13:33:08


 10%|▉         | 3880/40000 [3:13:43<37:36:38,  3.75s/it]

Episode 3880 - Step 807502 - Epsilon 0.81719664194905 - Mean Reward 633.43 - Mean Length 185.18 - Mean Loss 1.222 - Mean Q Value 34.374 - Time Delta 53.504 - Time 2023-11-03T13:34:02


 10%|▉         | 3900/40000 [3:14:29<17:36:03,  1.76s/it]

Episode 3900 - Step 810763 - Epsilon 0.8165306937973686 - Mean Reward 658.94 - Mean Length 184.72 - Mean Loss 1.232 - Mean Q Value 34.292 - Time Delta 46.392 - Time 2023-11-03T13:34:48


 10%|▉         | 3901/40000 [3:14:30<15:29:49,  1.55s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 810763


 10%|▉         | 3920/40000 [3:15:22<32:37:29,  3.26s/it]

Episode 3920 - Step 814445 - Epsilon 0.8157794230251324 - Mean Reward 706.52 - Mean Length 193.82 - Mean Loss 1.24 - Mean Q Value 34.414 - Time Delta 52.992 - Time 2023-11-03T13:35:41


 10%|▉         | 3940/40000 [3:16:09<31:26:33,  3.14s/it]

Episode 3940 - Step 817843 - Epsilon 0.8150867125890019 - Mean Reward 689.11 - Mean Length 179.82 - Mean Loss 1.218 - Mean Q Value 34.536 - Time Delta 48.125 - Time 2023-11-03T13:36:29


 10%|▉         | 3960/40000 [3:16:55<19:20:02,  1.93s/it]

Episode 3960 - Step 821234 - Epsilon 0.8143960155525437 - Mean Reward 671.66 - Mean Length 175.14 - Mean Loss 1.237 - Mean Q Value 34.739 - Time Delta 48.049 - Time 2023-11-03T13:37:17


 10%|▉         | 3980/40000 [3:17:53<15:38:21,  1.56s/it]

Episode 3980 - Step 825051 - Epsilon 0.8136192487313106 - Mean Reward 644.7 - Mean Length 175.49 - Mean Loss 1.266 - Mean Q Value 34.989 - Time Delta 54.816 - Time 2023-11-03T13:38:12


 10%|█         | 4000/40000 [3:18:32<20:10:39,  2.02s/it]

Episode 4000 - Step 827773 - Epsilon 0.8130657691060872 - Mean Reward 624.91 - Mean Length 170.1 - Mean Loss 1.26 - Mean Q Value 35.202 - Time Delta 38.66 - Time 2023-11-03T13:38:51


 10%|█         | 4001/40000 [3:18:33<17:19:09,  1.73s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 827773


 10%|█         | 4020/40000 [3:19:11<17:38:28,  1.77s/it]

Episode 4020 - Step 830681 - Epsilon 0.812474885030391 - Mean Reward 602.82 - Mean Length 162.36 - Mean Loss 1.279 - Mean Q Value 35.316 - Time Delta 41.699 - Time 2023-11-03T13:39:33


 10%|█         | 4040/40000 [3:20:09<30:14:27,  3.03s/it]

Episode 4040 - Step 834706 - Epsilon 0.8116577432684732 - Mean Reward 621.33 - Mean Length 168.63 - Mean Loss 1.311 - Mean Q Value 35.37 - Time Delta 57.103 - Time 2023-11-03T13:40:30


 10%|█         | 4060/40000 [3:21:09<19:34:25,  1.96s/it]

Episode 4060 - Step 838901 - Epsilon 0.810806963309475 - Mean Reward 637.05 - Mean Length 176.67 - Mean Loss 1.333 - Mean Q Value 35.483 - Time Delta 59.094 - Time 2023-11-03T13:41:29


 10%|█         | 4080/40000 [3:22:14<27:14:42,  2.73s/it]

Episode 4080 - Step 843843 - Epsilon 0.8098058297586258 - Mean Reward 679.93 - Mean Length 187.92 - Mean Loss 1.358 - Mean Q Value 35.436 - Time Delta 70.543 - Time 2023-11-03T13:42:39


 10%|█         | 4100/40000 [3:23:16<34:12:25,  3.43s/it]

Episode 4100 - Step 847866 - Epsilon 0.8089917768789394 - Mean Reward 703.04 - Mean Length 200.93 - Mean Loss 1.369 - Mean Q Value 35.47 - Time Delta 57.217 - Time 2023-11-03T13:43:37


 10%|█         | 4101/40000 [3:23:18<30:00:53,  3.01s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 847866


 10%|█         | 4120/40000 [3:24:01<15:07:29,  1.52s/it]

Episode 4120 - Step 851028 - Epsilon 0.8083525214982977 - Mean Reward 697.46 - Mean Length 203.47 - Mean Loss 1.377 - Mean Q Value 35.39 - Time Delta 44.971 - Time 2023-11-03T13:44:22


 10%|█         | 4140/40000 [3:24:42<23:55:41,  2.40s/it]

Episode 4140 - Step 853933 - Epsilon 0.8077656685328949 - Mean Reward 668.48 - Mean Length 192.27 - Mean Loss 1.365 - Mean Q Value 35.272 - Time Delta 41.321 - Time 2023-11-03T13:45:03


 10%|█         | 4160/40000 [3:25:55<31:13:27,  3.14s/it]

Episode 4160 - Step 859040 - Epsilon 0.8067350116721956 - Mean Reward 689.95 - Mean Length 201.39 - Mean Loss 1.361 - Mean Q Value 35.147 - Time Delta 71.832 - Time 2023-11-03T13:46:15


 10%|█         | 4180/40000 [3:26:47<23:03:39,  2.32s/it]

Episode 4180 - Step 862763 - Epsilon 0.8059844922933018 - Mean Reward 654.31 - Mean Length 189.2 - Mean Loss 1.346 - Mean Q Value 35.087 - Time Delta 53.129 - Time 2023-11-03T13:47:08


 10%|█         | 4200/40000 [3:27:56<33:18:47,  3.35s/it]

Episode 4200 - Step 867519 - Epsilon 0.8050267461050551 - Mean Reward 657.18 - Mean Length 196.53 - Mean Loss 1.366 - Mean Q Value 35.001 - Time Delta 68.019 - Time 2023-11-03T13:48:16


 11%|█         | 4201/40000 [3:27:58<29:16:56,  2.94s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 867519


 11%|█         | 4220/40000 [3:28:53<21:35:39,  2.17s/it]

Episode 4220 - Step 871872 - Epsilon 0.8041511521580061 - Mean Reward 684.37 - Mean Length 208.44 - Mean Loss 1.352 - Mean Q Value 34.94 - Time Delta 62.207 - Time 2023-11-03T13:49:18


 11%|█         | 4240/40000 [3:29:51<34:06:20,  3.43s/it]

Episode 4240 - Step 875544 - Epsilon 0.8034132800431057 - Mean Reward 704.26 - Mean Length 216.11 - Mean Loss 1.336 - Mean Q Value 34.948 - Time Delta 51.991 - Time 2023-11-03T13:50:10


 11%|█         | 4260/40000 [3:30:49<48:22:35,  4.87s/it]

Episode 4260 - Step 879847 - Epsilon 0.8025494728026268 - Mean Reward 683.61 - Mean Length 208.07 - Mean Loss 1.331 - Mean Q Value 34.931 - Time Delta 61.096 - Time 2023-11-03T13:51:11


 11%|█         | 4280/40000 [3:31:52<24:42:57,  2.49s/it]

Episode 4280 - Step 884113 - Epsilon 0.8016940099394878 - Mean Reward 701.44 - Mean Length 213.5 - Mean Loss 1.351 - Mean Q Value 34.888 - Time Delta 60.287 - Time 2023-11-03T13:52:11


 11%|█         | 4300/40000 [3:32:43<26:01:59,  2.63s/it]

Episode 4300 - Step 887996 - Epsilon 0.8009161430002496 - Mean Reward 708.48 - Mean Length 204.77 - Mean Loss 1.341 - Mean Q Value 34.941 - Time Delta 54.956 - Time 2023-11-03T13:53:06


 11%|█         | 4301/40000 [3:32:48<33:15:16,  3.35s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 887996


 11%|█         | 4320/40000 [3:33:42<29:54:11,  3.02s/it]

Episode 4320 - Step 892036 - Epsilon 0.8001076259644769 - Mean Reward 713.16 - Mean Length 201.64 - Mean Loss 1.344 - Mean Q Value 34.97 - Time Delta 56.929 - Time 2023-11-03T13:54:03


 11%|█         | 4340/40000 [3:34:30<26:28:43,  2.67s/it]

Episode 4340 - Step 895405 - Epsilon 0.799434018944789 - Mean Reward 703.47 - Mean Length 198.61 - Mean Loss 1.33 - Mean Q Value 35.037 - Time Delta 48.083 - Time 2023-11-03T13:54:51


 11%|█         | 4360/40000 [3:35:23<21:31:44,  2.17s/it]

Episode 4360 - Step 899295 - Epsilon 0.7986569471765078 - Mean Reward 709.58 - Mean Length 194.48 - Mean Loss 1.322 - Mean Q Value 35.082 - Time Delta 55.595 - Time 2023-11-03T13:55:47


 11%|█         | 4380/40000 [3:36:14<19:38:20,  1.98s/it]

Episode 4380 - Step 902761 - Epsilon 0.7979652105833746 - Mean Reward 703.63 - Mean Length 186.48 - Mean Loss 1.299 - Mean Q Value 35.23 - Time Delta 49.011 - Time 2023-11-03T13:56:36


 11%|█         | 4400/40000 [3:37:13<29:30:57,  2.98s/it]

Episode 4400 - Step 906840 - Epsilon 0.7971519002152084 - Mean Reward 703.47 - Mean Length 188.44 - Mean Loss 1.281 - Mean Q Value 35.296 - Time Delta 57.946 - Time 2023-11-03T13:57:34


 11%|█         | 4401/40000 [3:37:16<30:06:18,  3.04s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 906840


 11%|█         | 4420/40000 [3:37:55<14:42:21,  1.49s/it]

Episode 4420 - Step 910005 - Epsilon 0.7965214031685518 - Mean Reward 673.86 - Mean Length 179.69 - Mean Loss 1.277 - Mean Q Value 35.431 - Time Delta 45.586 - Time 2023-11-03T13:58:20


 11%|█         | 4440/40000 [3:38:58<21:44:13,  2.20s/it]

Episode 4440 - Step 914021 - Epsilon 0.7957220968986365 - Mean Reward 665.14 - Mean Length 186.16 - Mean Loss 1.312 - Mean Q Value 35.41 - Time Delta 57.273 - Time 2023-11-03T13:59:17


 11%|█         | 4460/40000 [3:39:59<35:17:26,  3.57s/it]

Episode 4460 - Step 918421 - Epsilon 0.7948472837180343 - Mean Reward 675.69 - Mean Length 191.26 - Mean Loss 1.321 - Mean Q Value 35.342 - Time Delta 62.648 - Time 2023-11-03T14:00:19


 11%|█         | 4480/40000 [3:40:47<23:30:36,  2.38s/it]

Episode 4480 - Step 922055 - Epsilon 0.7941254927936242 - Mean Reward 679.0 - Mean Length 192.94 - Mean Loss 1.343 - Mean Q Value 35.252 - Time Delta 51.896 - Time 2023-11-03T14:01:11


 11%|█▏        | 4500/40000 [3:41:59<46:52:12,  4.75s/it]

Episode 4500 - Step 926912 - Epsilon 0.7931618109871249 - Mean Reward 677.69 - Mean Length 200.72 - Mean Loss 1.371 - Mean Q Value 35.093 - Time Delta 68.812 - Time 2023-11-03T14:02:20


 11%|█▏        | 4501/40000 [3:42:02<42:17:31,  4.29s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 926912


 11%|█▏        | 4520/40000 [3:42:41<15:03:22,  1.53s/it]

Episode 4520 - Step 929996 - Epsilon 0.7925505188377616 - Mean Reward 681.3 - Mean Length 199.91 - Mean Loss 1.338 - Mean Q Value 34.93 - Time Delta 43.982 - Time 2023-11-03T14:03:04


 11%|█▏        | 4540/40000 [3:43:46<31:04:02,  3.15s/it]

Episode 4540 - Step 934356 - Epsilon 0.7916871093078602 - Mean Reward 711.08 - Mean Length 203.35 - Mean Loss 1.326 - Mean Q Value 34.781 - Time Delta 61.505 - Time 2023-11-03T14:04:06


 11%|█▏        | 4560/40000 [3:44:33<20:49:17,  2.12s/it]

Episode 4560 - Step 937629 - Epsilon 0.7910395762077437 - Mean Reward 692.38 - Mean Length 192.08 - Mean Loss 1.318 - Mean Q Value 34.735 - Time Delta 47.12 - Time 2023-11-03T14:04:53


 11%|█▏        | 4580/40000 [3:45:20<33:54:08,  3.45s/it]

Episode 4580 - Step 941168 - Epsilon 0.7903400133698928 - Mean Reward 690.14 - Mean Length 191.13 - Mean Loss 1.291 - Mean Q Value 34.688 - Time Delta 50.614 - Time 2023-11-03T14:05:43


 12%|█▏        | 4600/40000 [3:46:22<34:47:17,  3.54s/it]

Episode 4600 - Step 945671 - Epsilon 0.7894507886048641 - Mean Reward 715.55 - Mean Length 187.59 - Mean Loss 1.28 - Mean Q Value 34.805 - Time Delta 63.918 - Time 2023-11-03T14:06:47


 12%|█▏        | 4601/40000 [3:46:29<45:20:23,  4.61s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 945671


 12%|█▏        | 4620/40000 [3:47:26<24:05:13,  2.45s/it]

Episode 4620 - Step 949824 - Epsilon 0.7886315665733048 - Mean Reward 749.61 - Mean Length 198.28 - Mean Loss 1.302 - Mean Q Value 34.928 - Time Delta 58.724 - Time 2023-11-03T14:07:46


 12%|█▏        | 4640/40000 [3:48:30<39:31:14,  4.02s/it]

Episode 4640 - Step 954405 - Epsilon 0.7877289031450349 - Mean Reward 750.14 - Mean Length 200.49 - Mean Loss 1.29 - Mean Q Value 35.064 - Time Delta 65.577 - Time 2023-11-03T14:08:52


 12%|█▏        | 4660/40000 [3:49:22<20:16:42,  2.07s/it]

Episode 4660 - Step 958047 - Epsilon 0.7870120023076409 - Mean Reward 743.27 - Mean Length 204.18 - Mean Loss 1.294 - Mean Q Value 35.17 - Time Delta 51.657 - Time 2023-11-03T14:09:43


 12%|█▏        | 4680/40000 [3:50:23<34:59:45,  3.57s/it]

Episode 4680 - Step 962328 - Epsilon 0.7861701531816924 - Mean Reward 754.6 - Mean Length 211.6 - Mean Loss 1.293 - Mean Q Value 35.271 - Time Delta 61.293 - Time 2023-11-03T14:10:45


 12%|█▏        | 4700/40000 [3:51:07<18:40:01,  1.90s/it]

Episode 4700 - Step 965256 - Epsilon 0.7855948871306357 - Mean Reward 713.19 - Mean Length 195.85 - Mean Loss 1.291 - Mean Q Value 35.224 - Time Delta 42.246 - Time 2023-11-03T14:11:27


 12%|█▏        | 4701/40000 [3:51:09<19:19:30,  1.97s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 965256


 12%|█▏        | 4720/40000 [3:51:59<19:37:46,  2.00s/it]

Episode 4720 - Step 969299 - Epsilon 0.7848012481520119 - Mean Reward 689.92 - Mean Length 194.75 - Mean Loss 1.31 - Mean Q Value 35.318 - Time Delta 57.744 - Time 2023-11-03T14:12:25


 12%|█▏        | 4740/40000 [3:52:47<19:01:36,  1.94s/it]

Episode 4740 - Step 972446 - Epsilon 0.7841840485155347 - Mean Reward 674.77 - Mean Length 180.41 - Mean Loss 1.327 - Mean Q Value 35.464 - Time Delta 45.022 - Time 2023-11-03T14:13:10


 12%|█▏        | 4760/40000 [3:53:43<24:03:43,  2.46s/it]

Episode 4760 - Step 976260 - Epsilon 0.7834366852937885 - Mean Reward 706.18 - Mean Length 182.13 - Mean Loss 1.336 - Mean Q Value 35.595 - Time Delta 54.009 - Time 2023-11-03T14:14:04


 12%|█▏        | 4780/40000 [3:54:26<21:23:07,  2.19s/it]

Episode 4780 - Step 979481 - Epsilon 0.7828060767568789 - Mean Reward 694.16 - Mean Length 171.53 - Mean Loss 1.335 - Mean Q Value 35.782 - Time Delta 45.857 - Time 2023-11-03T14:14:49


 12%|█▏        | 4800/40000 [3:55:18<32:21:41,  3.31s/it]

Episode 4800 - Step 982954 - Epsilon 0.7821267002727142 - Mean Reward 713.0 - Mean Length 176.98 - Mean Loss 1.342 - Mean Q Value 36.068 - Time Delta 50.076 - Time 2023-11-03T14:15:40


 12%|█▏        | 4801/40000 [3:55:21<32:27:14,  3.32s/it]

MarioNet saved to checkpoints2/mario_net_1.chkpt at step 982954


 12%|█▏        | 4820/40000 [3:56:06<17:26:42,  1.79s/it]

Episode 4820 - Step 986409 - Epsilon 0.781451429925217 - Mean Reward 715.27 - Mean Length 171.1 - Mean Loss 1.336 - Mean Q Value 36.3 - Time Delta 49.416 - Time 2023-11-03T14:16:29


 12%|█▏        | 4840/40000 [3:57:09<37:59:34,  3.89s/it]

Episode 4840 - Step 990678 - Epsilon 0.7806178706694951 - Mean Reward 735.97 - Mean Length 182.32 - Mean Loss 1.334 - Mean Q Value 36.536 - Time Delta 60.665 - Time 2023-11-03T14:17:30


 12%|█▏        | 4860/40000 [3:58:03<40:20:32,  4.13s/it]

Episode 4860 - Step 994473 - Epsilon 0.7798776105884629 - Mean Reward 713.81 - Mean Length 182.13 - Mean Loss 1.347 - Mean Q Value 36.728 - Time Delta 54.227 - Time 2023-11-03T14:18:24


 12%|█▏        | 4880/40000 [3:59:00<29:06:04,  2.98s/it]

Episode 4880 - Step 998494 - Epsilon 0.7790940324343156 - Mean Reward 723.9 - Mean Length 190.13 - Mean Loss 1.354 - Mean Q Value 36.932 - Time Delta 56.82 - Time 2023-11-03T14:19:21


 12%|█▏        | 4889/40000 [3:59:22<18:58:47,  1.95s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1000000


 12%|█▏        | 4900/40000 [3:59:47<24:32:34,  2.52s/it]

Episode 4900 - Step 1001737 - Epsilon 0.7784626378546677 - Mean Reward 714.97 - Mean Length 187.83 - Mean Loss 1.343 - Mean Q Value 37.089 - Time Delta 46.007 - Time 2023-11-03T14:20:07


 12%|█▏        | 4901/40000 [3:59:48<20:10:41,  2.07s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1001737


 12%|█▏        | 4920/40000 [4:00:40<18:01:53,  1.85s/it]

Episode 4920 - Step 1005515 - Epsilon 0.7777277269175706 - Mean Reward 716.17 - Mean Length 191.06 - Mean Loss 1.337 - Mean Q Value 37.066 - Time Delta 54.053 - Time 2023-11-03T14:21:01


 12%|█▏        | 4940/40000 [4:01:29<26:12:34,  2.69s/it]

Episode 4940 - Step 1008958 - Epsilon 0.7770585857158462 - Mean Reward 691.86 - Mean Length 182.8 - Mean Loss 1.322 - Mean Q Value 37.156 - Time Delta 48.699 - Time 2023-11-03T14:21:49


 12%|█▏        | 4960/40000 [4:02:20<25:47:25,  2.65s/it]

Episode 4960 - Step 1012709 - Epsilon 0.776330240492188 - Mean Reward 702.47 - Mean Length 182.36 - Mean Loss 1.298 - Mean Q Value 37.281 - Time Delta 53.587 - Time 2023-11-03T14:22:43


 12%|█▏        | 4980/40000 [4:03:07<14:22:22,  1.48s/it]

Episode 4980 - Step 1016044 - Epsilon 0.7756832448270053 - Mean Reward 694.93 - Mean Length 175.5 - Mean Loss 1.298 - Mean Q Value 37.421 - Time Delta 47.968 - Time 2023-11-03T14:23:31


 12%|█▎        | 5000/40000 [4:04:07<24:29:39,  2.52s/it]

Episode 5000 - Step 1020010 - Epsilon 0.7749145359439666 - Mean Reward 711.79 - Mean Length 182.73 - Mean Loss 1.306 - Mean Q Value 37.56 - Time Delta 56.351 - Time 2023-11-03T14:24:27


 13%|█▎        | 5001/40000 [4:04:09<23:50:13,  2.45s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1020010


 13%|█▎        | 5020/40000 [4:05:11<19:01:27,  1.96s/it]

Episode 5020 - Step 1024654 - Epsilon 0.7740153821150124 - Mean Reward 723.17 - Mean Length 191.39 - Mean Loss 1.31 - Mean Q Value 37.733 - Time Delta 65.792 - Time 2023-11-03T14:25:33


 13%|█▎        | 5040/40000 [4:06:11<36:29:36,  3.76s/it]

Episode 5040 - Step 1028932 - Epsilon 0.773188015073693 - Mean Reward 755.8 - Mean Length 199.74 - Mean Loss 1.307 - Mean Q Value 37.866 - Time Delta 60.764 - Time 2023-11-03T14:26:34


 13%|█▎        | 5060/40000 [4:07:08<30:20:00,  3.13s/it]

Episode 5060 - Step 1032706 - Epsilon 0.7724588561250492 - Mean Reward 752.75 - Mean Length 199.97 - Mean Loss 1.328 - Mean Q Value 38.06 - Time Delta 54.322 - Time 2023-11-03T14:27:28


 13%|█▎        | 5080/40000 [4:07:59<14:44:45,  1.52s/it]

Episode 5080 - Step 1036248 - Epsilon 0.7717751464795133 - Mean Reward 760.88 - Mean Length 202.04 - Mean Loss 1.349 - Mean Q Value 38.133 - Time Delta 51.005 - Time 2023-11-03T14:28:19


 13%|█▎        | 5100/40000 [4:08:43<33:43:18,  3.48s/it]

Episode 5100 - Step 1039225 - Epsilon 0.7712009664481394 - Mean Reward 730.2 - Mean Length 192.15 - Mean Loss 1.35 - Mean Q Value 38.27 - Time Delta 43.348 - Time 2023-11-03T14:29:03


 13%|█▎        | 5101/40000 [4:08:44<26:37:03,  2.75s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1039225


 13%|█▎        | 5120/40000 [4:09:45<44:03:59,  4.55s/it]

Episode 5120 - Step 1043702 - Epsilon 0.7703382825280327 - Mean Reward 743.12 - Mean Length 190.48 - Mean Loss 1.378 - Mean Q Value 38.4 - Time Delta 64.942 - Time 2023-11-03T14:30:07


 13%|█▎        | 5140/40000 [4:10:37<24:05:40,  2.49s/it]

Episode 5140 - Step 1047082 - Epsilon 0.7696876215408567 - Mean Reward 718.37 - Mean Length 181.5 - Mean Loss 1.406 - Mean Q Value 38.598 - Time Delta 49.05 - Time 2023-11-03T14:30:57


 13%|█▎        | 5160/40000 [4:11:12<20:51:43,  2.16s/it]

Episode 5160 - Step 1050470 - Epsilon 0.7690359720563801 - Mean Reward 712.93 - Mean Length 177.64 - Mean Loss 1.412 - Mean Q Value 38.768 - Time Delta 48.219 - Time 2023-11-03T14:31:45


 13%|█▎        | 5180/40000 [4:12:19<27:07:31,  2.80s/it]

Episode 5180 - Step 1054227 - Epsilon 0.7683139940412808 - Mean Reward 703.76 - Mean Length 179.79 - Mean Loss 1.408 - Mean Q Value 38.943 - Time Delta 53.869 - Time 2023-11-03T14:32:39


 13%|█▎        | 5200/40000 [4:13:14<24:36:20,  2.55s/it]

Episode 5200 - Step 1058141 - Epsilon 0.7675625664000053 - Mean Reward 733.62 - Mean Length 189.16 - Mean Loss 1.414 - Mean Q Value 39.056 - Time Delta 55.533 - Time 2023-11-03T14:33:34


 13%|█▎        | 5201/40000 [4:13:16<22:23:05,  2.32s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1058141


 13%|█▎        | 5220/40000 [4:14:04<18:33:46,  1.92s/it]

Episode 5220 - Step 1061612 - Epsilon 0.7668968027996516 - Mean Reward 716.38 - Mean Length 179.1 - Mean Loss 1.427 - Mean Q Value 39.183 - Time Delta 49.651 - Time 2023-11-03T14:34:24


 13%|█▎        | 5240/40000 [4:14:59<39:58:20,  4.14s/it]

Episode 5240 - Step 1065425 - Epsilon 0.7661661066543255 - Mean Reward 725.4 - Mean Length 183.43 - Mean Loss 1.449 - Mean Q Value 39.339 - Time Delta 54.149 - Time 2023-11-03T14:35:18


 13%|█▎        | 5260/40000 [4:15:39<21:26:48,  2.22s/it]

Episode 5260 - Step 1068695 - Epsilon 0.7655400217308688 - Mean Reward 726.28 - Mean Length 182.25 - Mean Loss 1.457 - Mean Q Value 39.484 - Time Delta 46.462 - Time 2023-11-03T14:36:04


 13%|█▎        | 5280/40000 [4:16:26<20:40:38,  2.14s/it]

Episode 5280 - Step 1071938 - Epsilon 0.7649196116133471 - Mean Reward 702.08 - Mean Length 177.11 - Mean Loss 1.496 - Mean Q Value 39.638 - Time Delta 46.268 - Time 2023-11-03T14:36:51


 13%|█▎        | 5300/40000 [4:17:33<18:54:33,  1.96s/it]

Episode 5300 - Step 1076317 - Epsilon 0.7640826739661317 - Mean Reward 712.23 - Mean Length 181.76 - Mean Loss 1.527 - Mean Q Value 39.752 - Time Delta 62.385 - Time 2023-11-03T14:37:53


 13%|█▎        | 5301/40000 [4:17:35<19:34:13,  2.03s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1076317


 13%|█▎        | 5320/40000 [4:18:13<23:41:27,  2.46s/it]

Episode 5320 - Step 1079144 - Epsilon 0.7635428492517751 - Mean Reward 689.75 - Mean Length 175.32 - Mean Loss 1.531 - Mean Q Value 39.896 - Time Delta 40.903 - Time 2023-11-03T14:38:34


 13%|█▎        | 5340/40000 [4:18:55<22:33:07,  2.34s/it]

Episode 5340 - Step 1081921 - Epsilon 0.7630129435271791 - Mean Reward 671.57 - Mean Length 164.96 - Mean Loss 1.555 - Mean Q Value 39.997 - Time Delta 40.108 - Time 2023-11-03T14:39:14


 13%|█▎        | 5360/40000 [4:19:43<30:26:18,  3.16s/it]

Episode 5360 - Step 1085491 - Epsilon 0.7623322581909133 - Mean Reward 672.11 - Mean Length 167.96 - Mean Loss 1.592 - Mean Q Value 40.089 - Time Delta 51.608 - Time 2023-11-03T14:40:06


 13%|█▎        | 5380/40000 [4:20:35<27:24:51,  2.85s/it]

Episode 5380 - Step 1088887 - Epsilon 0.7616853126899786 - Mean Reward 699.5 - Mean Length 169.49 - Mean Loss 1.615 - Mean Q Value 40.226 - Time Delta 48.308 - Time 2023-11-03T14:40:54


 14%|█▎        | 5400/40000 [4:21:22<19:24:09,  2.02s/it]

Episode 5400 - Step 1092397 - Epsilon 0.7610172269099926 - Mean Reward 682.59 - Mean Length 160.8 - Mean Loss 1.656 - Mean Q Value 40.392 - Time Delta 50.072 - Time 2023-11-03T14:41:44


 14%|█▎        | 5401/40000 [4:21:26<24:37:45,  2.56s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1092397


 14%|█▎        | 5420/40000 [4:22:12<23:40:59,  2.47s/it]

Episode 5420 - Step 1096214 - Epsilon 0.7602913725087429 - Mean Reward 705.92 - Mean Length 170.7 - Mean Loss 1.695 - Mean Q Value 40.541 - Time Delta 55.305 - Time 2023-11-03T14:42:39


 14%|█▎        | 5440/40000 [4:22:54<16:03:20,  1.67s/it]

Episode 5440 - Step 1098708 - Epsilon 0.7598174785304824 - Mean Reward 693.5 - Mean Length 167.87 - Mean Loss 1.688 - Mean Q Value 40.74 - Time Delta 35.955 - Time 2023-11-03T14:43:15


 14%|█▎        | 5460/40000 [4:23:31<27:58:44,  2.92s/it]

Episode 5460 - Step 1101366 - Epsilon 0.7593127474683029 - Mean Reward 667.11 - Mean Length 158.75 - Mean Loss 1.687 - Mean Q Value 40.855 - Time Delta 38.136 - Time 2023-11-03T14:43:53


 14%|█▎        | 5480/40000 [4:24:14<24:38:36,  2.57s/it]

Episode 5480 - Step 1104421 - Epsilon 0.7587330436874401 - Mean Reward 649.5 - Mean Length 155.34 - Mean Loss 1.656 - Mean Q Value 40.887 - Time Delta 43.898 - Time 2023-11-03T14:44:37


 14%|█▍        | 5500/40000 [4:25:20<24:25:54,  2.55s/it]

Episode 5500 - Step 1108836 - Epsilon 0.7578960539843973 - Mean Reward 659.83 - Mean Length 164.39 - Mean Loss 1.608 - Mean Q Value 40.966 - Time Delta 63.079 - Time 2023-11-03T14:45:40


 14%|█▍        | 5501/40000 [4:25:22<22:40:33,  2.37s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1108836


 14%|█▍        | 5520/40000 [4:26:01<16:23:54,  1.71s/it]

Episode 5520 - Step 1111705 - Epsilon 0.7573526478739893 - Mean Reward 641.92 - Mean Length 154.91 - Mean Loss 1.552 - Mean Q Value 40.936 - Time Delta 41.648 - Time 2023-11-03T14:46:22


 14%|█▍        | 5540/40000 [4:26:51<24:45:44,  2.59s/it]

Episode 5540 - Step 1115268 - Epsilon 0.7566783362846239 - Mean Reward 652.63 - Mean Length 165.6 - Mean Loss 1.526 - Mean Q Value 40.883 - Time Delta 50.708 - Time 2023-11-03T14:47:13


 14%|█▍        | 5560/40000 [4:27:55<27:56:05,  2.92s/it]

Episode 5560 - Step 1119604 - Epsilon 0.7558585412746712 - Mean Reward 715.35 - Mean Length 182.38 - Mean Loss 1.492 - Mean Q Value 40.838 - Time Delta 62.451 - Time 2023-11-03T14:48:15


 14%|█▍        | 5580/40000 [4:29:01<18:29:55,  1.93s/it]

Episode 5580 - Step 1124406 - Epsilon 0.7549516774363193 - Mean Reward 734.48 - Mean Length 199.85 - Mean Loss 1.481 - Mean Q Value 40.728 - Time Delta 70.327 - Time 2023-11-03T14:49:26


 14%|█▍        | 5600/40000 [4:29:49<39:26:03,  4.13s/it]

Episode 5600 - Step 1127388 - Epsilon 0.7543890706281297 - Mean Reward 709.2 - Mean Length 185.52 - Mean Loss 1.47 - Mean Q Value 40.638 - Time Delta 44.684 - Time 2023-11-03T14:50:10


 14%|█▍        | 5601/40000 [4:29:52<35:41:07,  3.73s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1127388


 14%|█▍        | 5620/40000 [4:30:50<35:50:04,  3.75s/it]

Episode 5620 - Step 1131739 - Epsilon 0.753568929948797 - Mean Reward 729.13 - Mean Length 200.34 - Mean Loss 1.467 - Mean Q Value 40.726 - Time Delta 63.277 - Time 2023-11-03T14:51:14


 14%|█▍        | 5640/40000 [4:31:34<14:04:55,  1.48s/it]

Episode 5640 - Step 1134723 - Epsilon 0.7530069770912059 - Mean Reward 734.17 - Mean Length 194.55 - Mean Loss 1.457 - Mean Q Value 40.671 - Time Delta 43.449 - Time 2023-11-03T14:51:57


 14%|█▍        | 5660/40000 [4:32:18<16:12:15,  1.70s/it]

Episode 5660 - Step 1137628 - Epsilon 0.7524603042402894 - Mean Reward 671.94 - Mean Length 180.24 - Mean Loss 1.427 - Mean Q Value 40.726 - Time Delta 41.999 - Time 2023-11-03T14:52:39


 14%|█▍        | 5680/40000 [4:33:09<19:21:47,  2.03s/it]

Episode 5680 - Step 1141067 - Epsilon 0.7518136544312171 - Mean Reward 647.78 - Mean Length 166.61 - Mean Loss 1.42 - Mean Q Value 40.814 - Time Delta 49.453 - Time 2023-11-03T14:53:28


 14%|█▍        | 5700/40000 [4:34:15<28:04:21,  2.95s/it]

Episode 5700 - Step 1145658 - Epsilon 0.7509512552054466 - Mean Reward 678.03 - Mean Length 182.7 - Mean Loss 1.415 - Mean Q Value 40.797 - Time Delta 66.043 - Time 2023-11-03T14:54:34


 14%|█▍        | 5701/40000 [4:34:16<22:27:09,  2.36s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1145658


 14%|█▍        | 5720/40000 [4:35:06<25:56:21,  2.72s/it]

Episode 5720 - Step 1149434 - Epsilon 0.7502426916264913 - Mean Reward 674.11 - Mean Length 176.95 - Mean Loss 1.41 - Mean Q Value 40.651 - Time Delta 53.62 - Time 2023-11-03T14:55:28


 14%|█▍        | 5740/40000 [4:36:14<22:29:21,  2.36s/it]

Episode 5740 - Step 1154041 - Epsilon 0.7493790969164122 - Mean Reward 680.63 - Mean Length 193.18 - Mean Loss 1.4 - Mean Q Value 40.505 - Time Delta 66.811 - Time 2023-11-03T14:56:35


 14%|█▍        | 5760/40000 [4:37:12<34:27:32,  3.62s/it]

Episode 5760 - Step 1157955 - Epsilon 0.7486461880125225 - Mean Reward 721.58 - Mean Length 203.27 - Mean Loss 1.396 - Mean Q Value 40.386 - Time Delta 56.032 - Time 2023-11-03T14:57:31


 14%|█▍        | 5780/40000 [4:38:06<14:10:53,  1.49s/it]

Episode 5780 - Step 1161959 - Epsilon 0.7478971680316325 - Mean Reward 735.9 - Mean Length 208.92 - Mean Loss 1.384 - Mean Q Value 40.239 - Time Delta 57.044 - Time 2023-11-03T14:58:28


 14%|█▍        | 5800/40000 [4:38:59<25:52:51,  2.72s/it]

Episode 5800 - Step 1165440 - Epsilon 0.7472465935620151 - Mean Reward 710.09 - Mean Length 197.82 - Mean Loss 1.38 - Mean Q Value 40.084 - Time Delta 49.82 - Time 2023-11-03T14:59:18


 15%|█▍        | 5801/40000 [4:39:00<20:59:21,  2.21s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1165440


 15%|█▍        | 5820/40000 [4:39:36<23:46:04,  2.50s/it]

Episode 5820 - Step 1168061 - Epsilon 0.7467571205514407 - Mean Reward 685.57 - Mean Length 186.27 - Mean Loss 1.38 - Mean Q Value 40.042 - Time Delta 37.367 - Time 2023-11-03T14:59:55


 15%|█▍        | 5840/40000 [4:40:27<30:33:34,  3.22s/it]

Episode 5840 - Step 1171653 - Epsilon 0.7460868335772013 - Mean Reward 696.86 - Mean Length 176.12 - Mean Loss 1.38 - Mean Q Value 40.005 - Time Delta 51.827 - Time 2023-11-03T15:00:47


 15%|█▍        | 5860/40000 [4:41:13<18:53:10,  1.99s/it]

Episode 5860 - Step 1175091 - Epsilon 0.7454458473668095 - Mean Reward 673.33 - Mean Length 171.36 - Mean Loss 1.377 - Mean Q Value 39.905 - Time Delta 49.637 - Time 2023-11-03T15:01:37


 15%|█▍        | 5880/40000 [4:41:53<9:45:47,  1.03s/it] 

Episode 5880 - Step 1177777 - Epsilon 0.7449454484454235 - Mean Reward 647.65 - Mean Length 158.18 - Mean Loss 1.359 - Mean Q Value 39.893 - Time Delta 38.874 - Time 2023-11-03T15:02:16


 15%|█▍        | 5900/40000 [4:42:41<14:52:47,  1.57s/it]

Episode 5900 - Step 1181033 - Epsilon 0.7443393095065536 - Mean Reward 645.25 - Mean Length 155.93 - Mean Loss 1.359 - Mean Q Value 39.975 - Time Delta 46.332 - Time 2023-11-03T15:03:02


 15%|█▍        | 5901/40000 [4:42:44<19:19:22,  2.04s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1181033


 15%|█▍        | 5920/40000 [4:43:46<33:21:06,  3.52s/it]

Episode 5920 - Step 1185484 - Epsilon 0.7435115064906216 - Mean Reward 684.35 - Mean Length 174.23 - Mean Loss 1.351 - Mean Q Value 39.899 - Time Delta 64.477 - Time 2023-11-03T15:04:06


 15%|█▍        | 5940/40000 [4:44:38<24:53:55,  2.63s/it]

Episode 5940 - Step 1189084 - Epsilon 0.7428426470829593 - Mean Reward 674.3 - Mean Length 174.31 - Mean Loss 1.332 - Mean Q Value 39.799 - Time Delta 51.944 - Time 2023-11-03T15:04:58


 15%|█▍        | 5960/40000 [4:45:37<18:17:26,  1.93s/it]

Episode 5960 - Step 1193101 - Epsilon 0.7420970217211175 - Mean Reward 702.38 - Mean Length 180.1 - Mean Loss 1.335 - Mean Q Value 39.641 - Time Delta 58.059 - Time 2023-11-03T15:05:56


 15%|█▍        | 5980/40000 [4:46:29<39:03:48,  4.13s/it]

Episode 5980 - Step 1197038 - Epsilon 0.7413669719708014 - Mean Reward 741.78 - Mean Length 192.61 - Mean Loss 1.337 - Mean Q Value 39.531 - Time Delta 57.039 - Time 2023-11-03T15:06:53


 15%|█▌        | 6000/40000 [4:47:34<25:16:06,  2.68s/it]

Episode 6000 - Step 1201183 - Epsilon 0.7405991282577419 - Mean Reward 757.41 - Mean Length 201.5 - Mean Loss 1.314 - Mean Q Value 39.4 - Time Delta 59.889 - Time 2023-11-03T15:07:53


 15%|█▌        | 6001/40000 [4:47:35<20:35:08,  2.18s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1201183


 15%|█▌        | 6020/40000 [4:48:34<24:40:43,  2.61s/it]

Episode 6020 - Step 1205298 - Epsilon 0.7398376285728118 - Mean Reward 739.26 - Mean Length 198.14 - Mean Loss 1.317 - Mean Q Value 39.21 - Time Delta 59.769 - Time 2023-11-03T15:08:53


 15%|█▌        | 6040/40000 [4:49:24<13:50:00,  1.47s/it]

Episode 6040 - Step 1208853 - Epsilon 0.7391803899018659 - Mean Reward 738.04 - Mean Length 197.69 - Mean Loss 1.333 - Mean Q Value 39.063 - Time Delta 51.536 - Time 2023-11-03T15:09:45


 15%|█▌        | 6060/40000 [4:50:35<26:01:20,  2.76s/it]

Episode 6060 - Step 1213701 - Epsilon 0.7382850458453625 - Mean Reward 765.07 - Mean Length 206.0 - Mean Loss 1.349 - Mean Q Value 39.002 - Time Delta 70.887 - Time 2023-11-03T15:10:55


 15%|█▌        | 6080/40000 [4:51:34<27:51:57,  2.96s/it]

Episode 6080 - Step 1217596 - Epsilon 0.7375664905954159 - Mean Reward 759.22 - Mean Length 205.58 - Mean Loss 1.35 - Mean Q Value 38.911 - Time Delta 57.546 - Time 2023-11-03T15:11:53


 15%|█▌        | 6100/40000 [4:52:36<35:26:14,  3.76s/it]

Episode 6100 - Step 1221836 - Epsilon 0.736785084236131 - Mean Reward 767.93 - Mean Length 206.53 - Mean Loss 1.365 - Mean Q Value 38.757 - Time Delta 62.211 - Time 2023-11-03T15:12:55


 15%|█▌        | 6101/40000 [4:52:37<27:46:33,  2.95s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1221836


 15%|█▌        | 6120/40000 [4:53:40<21:58:18,  2.33s/it]

Episode 6120 - Step 1226487 - Epsilon 0.7359288851415038 - Mean Reward 787.72 - Mean Length 211.89 - Mean Loss 1.37 - Mean Q Value 38.724 - Time Delta 67.827 - Time 2023-11-03T15:14:03


 15%|█▌        | 6140/40000 [4:54:24<13:52:10,  1.47s/it]

Episode 6140 - Step 1229455 - Epsilon 0.7353830283783263 - Mean Reward 780.99 - Mean Length 206.02 - Mean Loss 1.374 - Mean Q Value 38.738 - Time Delta 43.361 - Time 2023-11-03T15:14:46


 15%|█▌        | 6160/40000 [4:55:12<29:23:49,  3.13s/it]

Episode 6160 - Step 1232659 - Epsilon 0.7347942223472089 - Mean Reward 734.37 - Mean Length 189.58 - Mean Loss 1.381 - Mean Q Value 38.803 - Time Delta 47.07 - Time 2023-11-03T15:15:33


 15%|█▌        | 6180/40000 [4:56:09<30:20:49,  3.23s/it]

Episode 6180 - Step 1236826 - Epsilon 0.734029148947137 - Mean Reward 732.81 - Mean Length 192.3 - Mean Loss 1.384 - Mean Q Value 38.877 - Time Delta 60.679 - Time 2023-11-03T15:16:34


 16%|█▌        | 6200/40000 [4:57:00<24:35:54,  2.62s/it]

Episode 6200 - Step 1240225 - Epsilon 0.7334056725368558 - Mean Reward 727.18 - Mean Length 183.89 - Mean Loss 1.377 - Mean Q Value 38.959 - Time Delta 49.877 - Time 2023-11-03T15:17:24


 16%|█▌        | 6201/40000 [4:57:06<32:47:52,  3.49s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1240225


 16%|█▌        | 6220/40000 [4:57:47<13:05:14,  1.39s/it]

Episode 6220 - Step 1243196 - Epsilon 0.732861137657111 - Mean Reward 699.67 - Mean Length 167.09 - Mean Loss 1.383 - Mean Q Value 38.979 - Time Delta 43.47 - Time 2023-11-03T15:18:07


 16%|█▌        | 6240/40000 [4:58:30<30:16:08,  3.23s/it]

Episode 6240 - Step 1246078 - Epsilon 0.7323333013172513 - Mean Reward 690.93 - Mean Length 166.23 - Mean Loss 1.396 - Mean Q Value 39.157 - Time Delta 42.029 - Time 2023-11-03T15:18:50


 16%|█▌        | 6260/40000 [4:59:27<22:04:31,  2.36s/it]

Episode 6260 - Step 1250047 - Epsilon 0.731607003902022 - Mean Reward 701.35 - Mean Length 173.88 - Mean Loss 1.376 - Mean Q Value 39.375 - Time Delta 57.263 - Time 2023-11-03T15:19:47


 16%|█▌        | 6280/40000 [5:00:06<18:19:54,  1.96s/it]

Episode 6280 - Step 1252696 - Epsilon 0.7311226574999817 - Mean Reward 670.18 - Mean Length 158.7 - Mean Loss 1.39 - Mean Q Value 39.574 - Time Delta 38.521 - Time 2023-11-03T15:20:25


 16%|█▌        | 6300/40000 [5:00:50<21:05:50,  2.25s/it]

Episode 6300 - Step 1255885 - Epsilon 0.7305400021802154 - Mean Reward 657.7 - Mean Length 156.6 - Mean Loss 1.416 - Mean Q Value 39.849 - Time Delta 46.105 - Time 2023-11-03T15:21:11


 16%|█▌        | 6301/40000 [5:00:53<22:57:49,  2.45s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1255885


 16%|█▌        | 6320/40000 [5:01:51<31:20:09,  3.35s/it]

Episode 6320 - Step 1259998 - Epsilon 0.7297892103959934 - Mean Reward 669.93 - Mean Length 168.02 - Mean Loss 1.412 - Mean Q Value 40.116 - Time Delta 59.675 - Time 2023-11-03T15:22:11


 16%|█▌        | 6340/40000 [5:02:45<32:23:39,  3.46s/it]

Episode 6340 - Step 1264008 - Epsilon 0.7290579632199521 - Mean Reward 706.22 - Mean Length 179.3 - Mean Loss 1.401 - Mean Q Value 40.27 - Time Delta 58.644 - Time 2023-11-03T15:23:10


 16%|█▌        | 6360/40000 [5:03:40<19:45:07,  2.11s/it]

Episode 6360 - Step 1267780 - Epsilon 0.7283707855298446 - Mean Reward 680.93 - Mean Length 177.33 - Mean Loss 1.403 - Mean Q Value 40.239 - Time Delta 54.279 - Time 2023-11-03T15:24:04


 16%|█▌        | 6380/40000 [5:04:23<12:49:04,  1.37s/it]

Episode 6380 - Step 1270525 - Epsilon 0.72787111248567 - Mean Reward 673.26 - Mean Length 178.29 - Mean Loss 1.393 - Mean Q Value 40.22 - Time Delta 40.456 - Time 2023-11-03T15:24:44


 16%|█▌        | 6400/40000 [5:05:24<16:08:11,  1.73s/it]

Episode 6400 - Step 1274965 - Epsilon 0.7270636236889518 - Mean Reward 690.72 - Mean Length 190.8 - Mean Loss 1.4 - Mean Q Value 40.108 - Time Delta 64.059 - Time 2023-11-03T15:25:49


 16%|█▌        | 6401/40000 [5:05:30<29:56:33,  3.21s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1274965


 16%|█▌        | 6420/40000 [5:06:16<20:03:18,  2.15s/it]

Episode 6420 - Step 1278232 - Epsilon 0.7264700368390535 - Mean Reward 687.82 - Mean Length 182.34 - Mean Loss 1.403 - Mean Q Value 40.024 - Time Delta 47.611 - Time 2023-11-03T15:26:36


 16%|█▌        | 6440/40000 [5:07:10<25:36:28,  2.75s/it]

Episode 6440 - Step 1281987 - Epsilon 0.7257883830079942 - Mean Reward 679.14 - Mean Length 179.79 - Mean Loss 1.434 - Mean Q Value 39.956 - Time Delta 54.419 - Time 2023-11-03T15:27:31


 16%|█▌        | 6460/40000 [5:08:05<33:04:58,  3.55s/it]

Episode 6460 - Step 1285675 - Epsilon 0.7251195144309557 - Mean Reward 696.9 - Mean Length 178.95 - Mean Loss 1.46 - Mean Q Value 39.94 - Time Delta 53.76 - Time 2023-11-03T15:28:24


 16%|█▌        | 6480/40000 [5:08:52<13:15:23,  1.42s/it]

Episode 6480 - Step 1288869 - Epsilon 0.7245407375336814 - Mean Reward 725.88 - Mean Length 183.44 - Mean Loss 1.481 - Mean Q Value 39.955 - Time Delta 46.859 - Time 2023-11-03T15:29:11


 16%|█▋        | 6500/40000 [5:09:32<27:50:24,  2.99s/it]

Episode 6500 - Step 1291795 - Epsilon 0.7240109297187092 - Mean Reward 703.85 - Mean Length 168.3 - Mean Loss 1.488 - Mean Q Value 40.072 - Time Delta 42.874 - Time 2023-11-03T15:29:54


 16%|█▋        | 6501/40000 [5:09:36<29:29:30,  3.17s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1291795


 16%|█▋        | 6520/40000 [5:10:19<27:27:55,  2.95s/it]

Episode 6520 - Step 1294976 - Epsilon 0.7234353888346041 - Mean Reward 691.28 - Mean Length 167.44 - Mean Loss 1.504 - Mean Q Value 40.272 - Time Delta 46.511 - Time 2023-11-03T15:30:41


 16%|█▋        | 6540/40000 [5:11:18<18:54:36,  2.03s/it]

Episode 6540 - Step 1299386 - Epsilon 0.7226382407273008 - Mean Reward 704.19 - Mean Length 173.99 - Mean Loss 1.488 - Mean Q Value 40.447 - Time Delta 64.126 - Time 2023-11-03T15:31:45


 16%|█▋        | 6560/40000 [5:12:22<30:35:10,  3.29s/it]

Episode 6560 - Step 1303345 - Epsilon 0.7219233632724213 - Mean Reward 716.01 - Mean Length 176.7 - Mean Loss 1.476 - Mean Q Value 40.581 - Time Delta 57.898 - Time 2023-11-03T15:32:43


 16%|█▋        | 6580/40000 [5:13:08<23:27:33,  2.53s/it]

Episode 6580 - Step 1306362 - Epsilon 0.7213790578045621 - Mean Reward 712.32 - Mean Length 174.93 - Mean Loss 1.496 - Mean Q Value 40.736 - Time Delta 44.785 - Time 2023-11-03T15:33:27


 16%|█▋        | 6600/40000 [5:13:54<19:38:23,  2.12s/it]

Episode 6600 - Step 1309514 - Epsilon 0.7208108349451094 - Mean Reward 721.58 - Mean Length 177.19 - Mean Loss 1.491 - Mean Q Value 40.84 - Time Delta 46.644 - Time 2023-11-03T15:34:14


 17%|█▋        | 6601/40000 [5:13:56<19:07:50,  2.06s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1309514


 17%|█▋        | 6620/40000 [5:14:49<18:46:51,  2.03s/it]

Episode 6620 - Step 1313219 - Epsilon 0.7201434929362094 - Mean Reward 743.6 - Mean Length 182.43 - Mean Loss 1.52 - Mean Q Value 40.956 - Time Delta 54.322 - Time 2023-11-03T15:35:08


 17%|█▋        | 6640/40000 [5:15:52<29:06:59,  3.14s/it]

Episode 6640 - Step 1317709 - Epsilon 0.7193355852872838 - Mean Reward 769.05 - Mean Length 183.23 - Mean Loss 1.528 - Mean Q Value 41.135 - Time Delta 66.661 - Time 2023-11-03T15:36:15


 17%|█▋        | 6660/40000 [5:16:52<19:57:32,  2.16s/it]

Episode 6660 - Step 1321540 - Epsilon 0.718646971356936 - Mean Reward 767.57 - Mean Length 181.95 - Mean Loss 1.551 - Mean Q Value 41.411 - Time Delta 56.025 - Time 2023-11-03T15:37:11


 17%|█▋        | 6680/40000 [5:17:40<11:03:18,  1.19s/it]

Episode 6680 - Step 1324887 - Epsilon 0.7180458949388507 - Mean Reward 778.87 - Mean Length 185.25 - Mean Loss 1.535 - Mean Q Value 41.633 - Time Delta 49.006 - Time 2023-11-03T15:38:00


 17%|█▋        | 6700/40000 [5:18:40<29:45:24,  3.22s/it]

Episode 6700 - Step 1328979 - Epsilon 0.7173117094963344 - Mean Reward 796.09 - Mean Length 194.65 - Mean Loss 1.55 - Mean Q Value 41.964 - Time Delta 59.985 - Time 2023-11-03T15:39:00


 17%|█▋        | 6701/40000 [5:18:42<26:37:06,  2.88s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1328979


 17%|█▋        | 6720/40000 [5:19:27<18:05:17,  1.96s/it]

Episode 6720 - Step 1332314 - Epsilon 0.7167139000301853 - Mean Reward 783.05 - Mean Length 190.95 - Mean Loss 1.546 - Mean Q Value 42.324 - Time Delta 48.938 - Time 2023-11-03T15:39:49


 17%|█▋        | 6740/40000 [5:20:28<22:48:45,  2.47s/it]

Episode 6740 - Step 1336352 - Epsilon 0.7159907423328689 - Mean Reward 740.52 - Mean Length 186.43 - Mean Loss 1.581 - Mean Q Value 42.557 - Time Delta 60.041 - Time 2023-11-03T15:40:49


 17%|█▋        | 6760/40000 [5:21:18<32:30:17,  3.52s/it]

Episode 6760 - Step 1339732 - Epsilon 0.7153859856256151 - Mean Reward 717.05 - Mean Length 181.92 - Mean Loss 1.596 - Mean Q Value 42.723 - Time Delta 49.765 - Time 2023-11-03T15:41:39


 17%|█▋        | 6780/40000 [5:22:15<29:46:26,  3.23s/it]

Episode 6780 - Step 1343729 - Epsilon 0.7146714931278331 - Mean Reward 733.48 - Mean Length 188.42 - Mean Loss 1.654 - Mean Q Value 42.86 - Time Delta 58.915 - Time 2023-11-03T15:42:38


 17%|█▋        | 6800/40000 [5:22:55<20:39:30,  2.24s/it]

Episode 6800 - Step 1346245 - Epsilon 0.7142221060494724 - Mean Reward 699.46 - Mean Length 172.66 - Mean Loss 1.671 - Mean Q Value 42.85 - Time Delta 37.804 - Time 2023-11-03T15:43:15


 17%|█▋        | 6801/40000 [5:22:57<20:06:58,  2.18s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1346245


 17%|█▋        | 6820/40000 [5:23:44<24:38:57,  2.67s/it]

Episode 6820 - Step 1349615 - Epsilon 0.7136206272583784 - Mean Reward 701.8 - Mean Length 173.01 - Mean Loss 1.678 - Mean Q Value 42.807 - Time Delta 50.403 - Time 2023-11-03T15:44:06


 17%|█▋        | 6840/40000 [5:24:32<12:51:37,  1.40s/it]

Episode 6840 - Step 1352743 - Epsilon 0.7130627939992864 - Mean Reward 687.01 - Mean Length 163.91 - Mean Loss 1.707 - Mean Q Value 42.716 - Time Delta 46.371 - Time 2023-11-03T15:44:52


 17%|█▋        | 6860/40000 [5:25:32<31:51:14,  3.46s/it]

Episode 6860 - Step 1356994 - Epsilon 0.7123053889575741 - Mean Reward 720.77 - Mean Length 172.62 - Mean Loss 1.717 - Mean Q Value 42.668 - Time Delta 62.877 - Time 2023-11-03T15:45:55


 17%|█▋        | 6880/40000 [5:26:21<13:53:54,  1.51s/it]

Episode 6880 - Step 1360361 - Epsilon 0.7117060581001812 - Mean Reward 699.4 - Mean Length 166.32 - Mean Loss 1.702 - Mean Q Value 42.68 - Time Delta 49.922 - Time 2023-11-03T15:46:45


 17%|█▋        | 6900/40000 [5:27:24<25:58:32,  2.83s/it]

Episode 6900 - Step 1364477 - Epsilon 0.7109740891374173 - Mean Reward 715.04 - Mean Length 182.32 - Mean Loss 1.692 - Mean Q Value 42.63 - Time Delta 59.948 - Time 2023-11-03T15:47:45


 17%|█▋        | 6901/40000 [5:27:27<25:24:05,  2.76s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1364477


 17%|█▋        | 6920/40000 [5:28:33<34:04:49,  3.71s/it]

Episode 6920 - Step 1369297 - Epsilon 0.7101178812217341 - Mean Reward 729.0 - Mean Length 196.82 - Mean Loss 1.664 - Mean Q Value 42.611 - Time Delta 70.359 - Time 2023-11-03T15:48:55


 17%|█▋        | 6940/40000 [5:29:21<15:39:13,  1.70s/it]

Episode 6940 - Step 1372617 - Epsilon 0.7095287278390707 - Mean Reward 725.13 - Mean Length 198.74 - Mean Loss 1.639 - Mean Q Value 42.632 - Time Delta 49.013 - Time 2023-11-03T15:49:44


 17%|█▋        | 6960/40000 [5:30:19<28:18:48,  3.08s/it]

Episode 6960 - Step 1376256 - Epsilon 0.7088835275281689 - Mean Reward 727.4 - Mean Length 192.62 - Mean Loss 1.632 - Mean Q Value 42.7 - Time Delta 54.051 - Time 2023-11-03T15:50:38


 17%|█▋        | 6980/40000 [5:31:22<30:41:09,  3.35s/it]

Episode 6980 - Step 1380620 - Epsilon 0.7081105572348692 - Mean Reward 740.44 - Mean Length 202.59 - Mean Loss 1.634 - Mean Q Value 42.716 - Time Delta 63.837 - Time 2023-11-03T15:51:42


 18%|█▊        | 7000/40000 [5:32:03<14:56:28,  1.63s/it]

Episode 7000 - Step 1383335 - Epsilon 0.7076300902109629 - Mean Reward 733.55 - Mean Length 188.58 - Mean Loss 1.662 - Mean Q Value 42.83 - Time Delta 40.133 - Time 2023-11-03T15:52:22


 18%|█▊        | 7001/40000 [5:32:04<13:41:31,  1.49s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1383335


 18%|█▊        | 7020/40000 [5:32:47<17:33:23,  1.92s/it]

Episode 7020 - Step 1386465 - Epsilon 0.7070765861828003 - Mean Reward 716.98 - Mean Length 171.68 - Mean Loss 1.682 - Mean Q Value 43.001 - Time Delta 46.151 - Time 2023-11-03T15:53:09


 18%|█▊        | 7040/40000 [5:33:31<13:16:16,  1.45s/it]

Episode 7040 - Step 1389549 - Epsilon 0.7065316401703449 - Mean Reward 724.6 - Mean Length 169.32 - Mean Loss 1.671 - Mean Q Value 43.16 - Time Delta 45.476 - Time 2023-11-03T15:53:54


 18%|█▊        | 7060/40000 [5:34:15<10:33:16,  1.15s/it]

Episode 7060 - Step 1392291 - Epsilon 0.7060474786357469 - Mean Reward 672.46 - Mean Length 160.35 - Mean Loss 1.654 - Mean Q Value 43.262 - Time Delta 40.64 - Time 2023-11-03T15:54:35


 18%|█▊        | 7080/40000 [5:35:01<23:18:13,  2.55s/it]

Episode 7080 - Step 1395443 - Epsilon 0.705491332303419 - Mean Reward 659.92 - Mean Length 148.23 - Mean Loss 1.623 - Mean Q Value 43.287 - Time Delta 46.418 - Time 2023-11-03T15:55:21


 18%|█▊        | 7100/40000 [5:35:52<20:24:22,  2.23s/it]

Episode 7100 - Step 1399005 - Epsilon 0.7048633718343145 - Mean Reward 664.62 - Mean Length 156.7 - Mean Loss 1.562 - Mean Q Value 43.462 - Time Delta 52.431 - Time 2023-11-03T15:56:14


 18%|█▊        | 7101/40000 [5:35:55<22:20:23,  2.44s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1399005


 18%|█▊        | 7120/40000 [5:37:00<27:30:16,  3.01s/it]

Episode 7120 - Step 1403617 - Epsilon 0.7040511326108412 - Mean Reward 704.73 - Mean Length 171.52 - Mean Loss 1.561 - Mean Q Value 43.551 - Time Delta 67.04 - Time 2023-11-03T15:57:21


 18%|█▊        | 7140/40000 [5:37:43<23:54:29,  2.62s/it]

Episode 7140 - Step 1406510 - Epsilon 0.7035421116623714 - Mean Reward 679.25 - Mean Length 169.61 - Mean Loss 1.558 - Mean Q Value 43.669 - Time Delta 42.41 - Time 2023-11-03T15:58:03


 18%|█▊        | 7160/40000 [5:38:47<37:48:32,  4.14s/it]

Episode 7160 - Step 1410980 - Epsilon 0.7027563423846088 - Mean Reward 739.67 - Mean Length 186.89 - Mean Loss 1.574 - Mean Q Value 43.761 - Time Delta 64.939 - Time 2023-11-03T15:59:08


 18%|█▊        | 7180/40000 [5:39:36<19:15:00,  2.11s/it]

Episode 7180 - Step 1414337 - Epsilon 0.7021668014711977 - Mean Reward 734.02 - Mean Length 188.94 - Mean Loss 1.626 - Mean Q Value 43.884 - Time Delta 48.928 - Time 2023-11-03T15:59:57


 18%|█▊        | 7200/40000 [5:40:36<24:15:05,  2.66s/it]

Episode 7200 - Step 1418456 - Epsilon 0.7014441172728347 - Mean Reward 761.96 - Mean Length 194.51 - Mean Loss 1.661 - Mean Q Value 43.879 - Time Delta 59.914 - Time 2023-11-03T16:00:57


 18%|█▊        | 7201/40000 [5:40:39<23:13:01,  2.55s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1418456


 18%|█▊        | 7220/40000 [5:41:25<18:41:29,  2.05s/it]

Episode 7220 - Step 1421849 - Epsilon 0.7008493695089881 - Mean Reward 731.6 - Mean Length 182.32 - Mean Loss 1.694 - Mean Q Value 43.818 - Time Delta 49.3 - Time 2023-11-03T16:01:46


 18%|█▊        | 7240/40000 [5:42:24<38:57:52,  4.28s/it]

Episode 7240 - Step 1425838 - Epsilon 0.7001507957720509 - Mean Reward 750.9 - Mean Length 193.28 - Mean Loss 1.704 - Mean Q Value 43.821 - Time Delta 58.091 - Time 2023-11-03T16:02:44


 18%|█▊        | 7260/40000 [5:43:16<24:04:22,  2.65s/it]

Episode 7260 - Step 1429496 - Epsilon 0.6995108004716789 - Mean Reward 738.09 - Mean Length 185.16 - Mean Loss 1.72 - Mean Q Value 43.865 - Time Delta 54.224 - Time 2023-11-03T16:03:38


 18%|█▊        | 7280/40000 [5:44:18<43:57:03,  4.84s/it]

Episode 7280 - Step 1433631 - Epsilon 0.698788054724819 - Mean Reward 751.08 - Mean Length 192.94 - Mean Loss 1.716 - Mean Q Value 43.888 - Time Delta 60.589 - Time 2023-11-03T16:04:39


 18%|█▊        | 7300/40000 [5:45:19<34:42:41,  3.82s/it]

Episode 7300 - Step 1437749 - Epsilon 0.6980690225178304 - Mean Reward 749.66 - Mean Length 192.93 - Mean Loss 1.728 - Mean Q Value 43.889 - Time Delta 60.357 - Time 2023-11-03T16:05:39


 18%|█▊        | 7301/40000 [5:45:21<30:00:38,  3.30s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1437749


 18%|█▊        | 7320/40000 [5:46:02<28:09:35,  3.10s/it]

Episode 7320 - Step 1440678 - Epsilon 0.6975580485153365 - Mean Reward 729.66 - Mean Length 188.29 - Mean Loss 1.72 - Mean Q Value 43.875 - Time Delta 42.342 - Time 2023-11-03T16:06:22


 18%|█▊        | 7340/40000 [5:46:52<23:47:57,  2.62s/it]

Episode 7340 - Step 1444198 - Epsilon 0.6969444673711604 - Mean Reward 729.57 - Mean Length 183.6 - Mean Loss 1.731 - Mean Q Value 43.832 - Time Delta 50.731 - Time 2023-11-03T16:07:12


 18%|█▊        | 7360/40000 [5:47:59<26:11:59,  2.89s/it]

Episode 7360 - Step 1449291 - Epsilon 0.6960576474086698 - Mean Reward 713.56 - Mean Length 197.95 - Mean Loss 1.732 - Mean Q Value 43.612 - Time Delta 73.809 - Time 2023-11-03T16:08:26


 18%|█▊        | 7380/40000 [5:49:00<30:17:26,  3.34s/it]

Episode 7380 - Step 1453002 - Epsilon 0.695412179308325 - Mean Reward 714.46 - Mean Length 193.71 - Mean Loss 1.69 - Mean Q Value 43.447 - Time Delta 54.476 - Time 2023-11-03T16:09:21


 18%|█▊        | 7400/40000 [5:49:52<16:20:40,  1.80s/it]

Episode 7400 - Step 1456415 - Epsilon 0.6948190718625561 - Mean Reward 681.07 - Mean Length 186.66 - Mean Loss 1.659 - Mean Q Value 43.24 - Time Delta 50.581 - Time 2023-11-03T16:10:11


 19%|█▊        | 7401/40000 [5:49:53<14:31:24,  1.60s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1456415


 19%|█▊        | 7420/40000 [5:51:04<39:06:31,  4.32s/it]

Episode 7420 - Step 1461817 - Epsilon 0.693881351926571 - Mean Reward 703.21 - Mean Length 211.39 - Mean Loss 1.629 - Mean Q Value 43.121 - Time Delta 77.742 - Time 2023-11-03T16:11:29


 19%|█▊        | 7440/40000 [5:51:52<13:01:51,  1.44s/it]

Episode 7440 - Step 1464741 - Epsilon 0.693374309941287 - Mean Reward 695.96 - Mean Length 205.43 - Mean Loss 1.627 - Mean Q Value 42.889 - Time Delta 43.062 - Time 2023-11-03T16:12:12


 19%|█▊        | 7460/40000 [5:52:47<33:40:02,  3.72s/it]

Episode 7460 - Step 1469027 - Epsilon 0.6926317571689514 - Mean Reward 694.03 - Mean Length 197.36 - Mean Loss 1.602 - Mean Q Value 42.796 - Time Delta 62.175 - Time 2023-11-03T16:13:14


 19%|█▊        | 7480/40000 [5:53:30<22:04:37,  2.44s/it]

Episode 7480 - Step 1471613 - Epsilon 0.6921841153977177 - Mean Reward 664.97 - Mean Length 186.11 - Mean Loss 1.614 - Mean Q Value 42.716 - Time Delta 37.891 - Time 2023-11-03T16:13:52


 19%|█▉        | 7500/40000 [5:54:24<17:34:26,  1.95s/it]

Episode 7500 - Step 1475267 - Epsilon 0.6915520938490056 - Mean Reward 677.03 - Mean Length 188.52 - Mean Loss 1.627 - Mean Q Value 42.714 - Time Delta 53.911 - Time 2023-11-03T16:14:46


 19%|█▉        | 7501/40000 [5:54:28<22:08:34,  2.45s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1475267


 19%|█▉        | 7520/40000 [5:54:58<12:24:14,  1.37s/it]

Episode 7520 - Step 1477451 - Epsilon 0.6911746094211522 - Mean Reward 621.66 - Mean Length 156.34 - Mean Loss 1.624 - Mean Q Value 42.676 - Time Delta 32.665 - Time 2023-11-03T16:15:19


 19%|█▉        | 7540/40000 [5:55:46<22:16:04,  2.47s/it]

Episode 7540 - Step 1481004 - Epsilon 0.6905609460811181 - Mean Reward 647.42 - Mean Length 162.63 - Mean Loss 1.591 - Mean Q Value 42.786 - Time Delta 51.776 - Time 2023-11-03T16:16:10


 19%|█▉        | 7560/40000 [5:56:40<28:16:27,  3.14s/it]

Episode 7560 - Step 1484412 - Epsilon 0.6899728386508024 - Mean Reward 648.93 - Mean Length 153.85 - Mean Loss 1.597 - Mean Q Value 42.889 - Time Delta 50.152 - Time 2023-11-03T16:17:01


 19%|█▉        | 7580/40000 [5:57:30<17:50:18,  1.98s/it]

Episode 7580 - Step 1487942 - Epsilon 0.6893642061431215 - Mean Reward 666.81 - Mean Length 163.29 - Mean Loss 1.585 - Mean Q Value 42.951 - Time Delta 51.666 - Time 2023-11-03T16:17:52


 19%|█▉        | 7600/40000 [5:58:19<16:54:26,  1.88s/it]

Episode 7600 - Step 1491313 - Epsilon 0.6887834891197322 - Mean Reward 643.54 - Mean Length 160.46 - Mean Loss 1.586 - Mean Q Value 42.858 - Time Delta 49.335 - Time 2023-11-03T16:18:42


 19%|█▉        | 7601/40000 [5:58:23<24:13:17,  2.69s/it]

MarioNet saved to checkpoints2/mario_net_2.chkpt at step 1491313


 19%|█▉        | 7620/40000 [5:59:15<24:32:57,  2.73s/it]

Episode 7620 - Step 1494913 - Epsilon 0.6881638627756632 - Mean Reward 689.33 - Mean Length 174.62 - Mean Loss 1.577 - Mean Q Value 42.822 - Time Delta 52.843 - Time 2023-11-03T16:19:34


 19%|█▉        | 7640/40000 [5:59:58<24:19:44,  2.71s/it]

Episode 7640 - Step 1497990 - Epsilon 0.6876346962147868 - Mean Reward 685.91 - Mean Length 169.86 - Mean Loss 1.591 - Mean Q Value 42.68 - Time Delta 44.52 - Time 2023-11-03T16:20:19


 19%|█▉        | 7649/40000 [6:00:23<21:41:21,  2.41s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1500000


 19%|█▉        | 7660/40000 [6:00:55<22:25:15,  2.50s/it]

Episode 7660 - Step 1501826 - Epsilon 0.6869755705599028 - Mean Reward 699.9 - Mean Length 174.14 - Mean Loss 1.582 - Mean Q Value 42.547 - Time Delta 55.892 - Time 2023-11-03T16:21:15


 19%|█▉        | 7680/40000 [6:01:47<24:25:25,  2.72s/it]

Episode 7680 - Step 1505709 - Epsilon 0.6863090125243215 - Mean Reward 697.72 - Mean Length 177.67 - Mean Loss 1.571 - Mean Q Value 42.457 - Time Delta 57.071 - Time 2023-11-03T16:22:12


 19%|█▉        | 7700/40000 [6:02:50<27:37:07,  3.08s/it]

Episode 7700 - Step 1509740 - Epsilon 0.6856177329074671 - Mean Reward 731.67 - Mean Length 184.27 - Mean Loss 1.549 - Mean Q Value 42.415 - Time Delta 58.166 - Time 2023-11-03T16:23:10


 19%|█▉        | 7701/40000 [6:02:52<24:37:41,  2.75s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1509740


 19%|█▉        | 7720/40000 [6:03:41<29:17:23,  3.27s/it]

Episode 7720 - Step 1513197 - Epsilon 0.6850254436875282 - Mean Reward 737.86 - Mean Length 182.84 - Mean Loss 1.573 - Mean Q Value 42.312 - Time Delta 50.196 - Time 2023-11-03T16:24:00


 19%|█▉        | 7740/40000 [6:04:29<25:48:12,  2.88s/it]

Episode 7740 - Step 1516657 - Epsilon 0.6844331528073542 - Mean Reward 737.42 - Mean Length 186.67 - Mean Loss 1.568 - Mean Q Value 42.314 - Time Delta 50.657 - Time 2023-11-03T16:24:51


 19%|█▉        | 7760/40000 [6:05:29<39:06:21,  4.37s/it]

Episode 7760 - Step 1520811 - Epsilon 0.6837227378360818 - Mean Reward 722.05 - Mean Length 189.85 - Mean Loss 1.566 - Mean Q Value 42.244 - Time Delta 59.595 - Time 2023-11-03T16:25:51


 19%|█▉        | 7780/40000 [6:06:21<12:00:43,  1.34s/it]

Episode 7780 - Step 1524241 - Epsilon 0.6831366968160476 - Mean Reward 727.36 - Mean Length 185.32 - Mean Loss 1.565 - Mean Q Value 42.126 - Time Delta 49.36 - Time 2023-11-03T16:26:40


 20%|█▉        | 7800/40000 [6:07:00<17:00:58,  1.90s/it]

Episode 7800 - Step 1526967 - Epsilon 0.6826712977016866 - Mean Reward 691.27 - Mean Length 172.27 - Mean Loss 1.586 - Mean Q Value 42.125 - Time Delta 39.657 - Time 2023-11-03T16:27:20


 20%|█▉        | 7801/40000 [6:07:01<14:46:22,  1.65s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1526967


 20%|█▉        | 7820/40000 [6:07:58<32:58:13,  3.69s/it]

Episode 7820 - Step 1530942 - Epsilon 0.6819932299854331 - Mean Reward 684.33 - Mean Length 177.45 - Mean Loss 1.562 - Mean Q Value 42.133 - Time Delta 58.71 - Time 2023-11-03T16:28:18


 20%|█▉        | 7840/40000 [6:08:52<25:10:12,  2.82s/it]

Episode 7840 - Step 1534631 - Epsilon 0.6813645515942897 - Mean Reward 694.22 - Mean Length 179.74 - Mean Loss 1.567 - Mean Q Value 42.104 - Time Delta 54.084 - Time 2023-11-03T16:29:12


 20%|█▉        | 7860/40000 [6:09:40<18:34:50,  2.08s/it]

Episode 7860 - Step 1537928 - Epsilon 0.6808031681843075 - Mean Reward 694.26 - Mean Length 171.17 - Mean Loss 1.56 - Mean Q Value 42.171 - Time Delta 48.132 - Time 2023-11-03T16:30:00


 20%|█▉        | 7880/40000 [6:10:23<26:18:31,  2.95s/it]

Episode 7880 - Step 1540930 - Epsilon 0.680292417026012 - Mean Reward 684.66 - Mean Length 166.89 - Mean Loss 1.597 - Mean Q Value 42.296 - Time Delta 43.371 - Time 2023-11-03T16:30:44


 20%|█▉        | 7900/40000 [6:11:10<23:32:44,  2.64s/it]

Episode 7900 - Step 1544360 - Epsilon 0.6797093162456325 - Mean Reward 706.19 - Mean Length 173.93 - Mean Loss 1.585 - Mean Q Value 42.37 - Time Delta 49.71 - Time 2023-11-03T16:31:34


 20%|█▉        | 7901/40000 [6:11:15<31:01:33,  3.48s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1544360


 20%|█▉        | 7920/40000 [6:12:09<29:31:11,  3.31s/it]

Episode 7920 - Step 1548258 - Epsilon 0.6790472620723903 - Mean Reward 718.04 - Mean Length 173.16 - Mean Loss 1.588 - Mean Q Value 42.431 - Time Delta 56.758 - Time 2023-11-03T16:32:30


 20%|█▉        | 7940/40000 [6:13:08<35:11:12,  3.95s/it]

Episode 7940 - Step 1552202 - Epsilon 0.6783780513633392 - Mean Reward 714.07 - Mean Length 175.71 - Mean Loss 1.588 - Mean Q Value 42.538 - Time Delta 57.392 - Time 2023-11-03T16:33:28


 20%|█▉        | 7960/40000 [6:13:55<24:14:18,  2.72s/it]

Episode 7960 - Step 1555547 - Epsilon 0.6778109947805068 - Mean Reward 713.16 - Mean Length 176.19 - Mean Loss 1.594 - Mean Q Value 42.555 - Time Delta 48.043 - Time 2023-11-03T16:34:16


 20%|█▉        | 7980/40000 [6:15:04<22:20:23,  2.51s/it]

Episode 7980 - Step 1560366 - Epsilon 0.6769948935800868 - Mean Reward 732.69 - Mean Length 194.36 - Mean Loss 1.569 - Mean Q Value 42.581 - Time Delta 68.875 - Time 2023-11-03T16:35:25


 20%|██        | 8000/40000 [6:15:50<17:54:37,  2.01s/it]

Episode 8000 - Step 1563545 - Epsilon 0.6764570655688256 - Mean Reward 736.27 - Mean Length 191.85 - Mean Loss 1.59 - Mean Q Value 42.65 - Time Delta 45.54 - Time 2023-11-03T16:36:10


 20%|██        | 8001/40000 [6:15:52<17:53:42,  2.01s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1563545


 20%|██        | 8020/40000 [6:16:29<18:53:45,  2.13s/it]

Episode 8020 - Step 1566384 - Epsilon 0.6759771204471493 - Mean Reward 708.63 - Mean Length 181.26 - Mean Loss 1.615 - Mean Q Value 42.77 - Time Delta 41.189 - Time 2023-11-03T16:36:51


 20%|██        | 8040/40000 [6:17:17<31:49:28,  3.58s/it]

Episode 8040 - Step 1569871 - Epsilon 0.6753880940978509 - Mean Reward 719.01 - Mean Length 176.69 - Mean Loss 1.596 - Mean Q Value 42.809 - Time Delta 50.858 - Time 2023-11-03T16:37:42


 20%|██        | 8060/40000 [6:18:22<17:34:42,  1.98s/it]

Episode 8060 - Step 1573901 - Epsilon 0.6747079831713505 - Mean Reward 735.37 - Mean Length 183.54 - Mean Loss 1.622 - Mean Q Value 42.991 - Time Delta 58.772 - Time 2023-11-03T16:38:41


 20%|██        | 8080/40000 [6:19:05<22:05:59,  2.49s/it]

Episode 8080 - Step 1577005 - Epsilon 0.6741846128048791 - Mean Reward 718.32 - Mean Length 166.39 - Mean Loss 1.634 - Mean Q Value 43.167 - Time Delta 44.698 - Time 2023-11-03T16:39:26


 20%|██        | 8100/40000 [6:20:00<24:33:04,  2.77s/it]

Episode 8100 - Step 1580670 - Epsilon 0.6735671739834852 - Mean Reward 724.75 - Mean Length 171.25 - Mean Loss 1.634 - Mean Q Value 43.259 - Time Delta 54.096 - Time 2023-11-03T16:40:20


 20%|██        | 8101/40000 [6:20:02<20:10:50,  2.28s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1580670


 20%|██        | 8120/40000 [6:20:50<16:00:11,  1.81s/it]

Episode 8120 - Step 1584025 - Epsilon 0.6730024563072237 - Mean Reward 746.0 - Mean Length 176.41 - Mean Loss 1.635 - Mean Q Value 43.296 - Time Delta 49.092 - Time 2023-11-03T16:41:09


 20%|██        | 8140/40000 [6:21:36<28:29:57,  3.22s/it]

Episode 8140 - Step 1587227 - Epsilon 0.6724639333461321 - Mean Reward 709.91 - Mean Length 173.56 - Mean Loss 1.668 - Mean Q Value 43.322 - Time Delta 46.858 - Time 2023-11-03T16:41:56


 20%|██        | 8160/40000 [6:22:16<21:35:00,  2.44s/it]

Episode 8160 - Step 1590014 - Epsilon 0.6719955572314378 - Mean Reward 678.98 - Mean Length 161.13 - Mean Loss 1.655 - Mean Q Value 43.347 - Time Delta 40.69 - Time 2023-11-03T16:42:36


 20%|██        | 8180/40000 [6:23:14<30:24:55,  3.44s/it]

Episode 8180 - Step 1594138 - Epsilon 0.6713030867551493 - Mean Reward 695.74 - Mean Length 171.33 - Mean Loss 1.666 - Mean Q Value 43.325 - Time Delta 60.031 - Time 2023-11-03T16:43:36


 20%|██        | 8200/40000 [6:24:09<25:07:31,  2.84s/it]

Episode 8200 - Step 1597791 - Epsilon 0.6706902989912873 - Mean Reward 693.49 - Mean Length 171.21 - Mean Loss 1.671 - Mean Q Value 43.307 - Time Delta 53.44 - Time 2023-11-03T16:44:30


 21%|██        | 8201/40000 [6:24:12<25:08:27,  2.85s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1597791


 21%|██        | 8220/40000 [6:25:03<33:54:58,  3.84s/it]

Episode 8220 - Step 1601619 - Epsilon 0.6700487553224777 - Mean Reward 693.86 - Mean Length 175.94 - Mean Loss 1.67 - Mean Q Value 43.259 - Time Delta 56.168 - Time 2023-11-03T16:45:26


 21%|██        | 8240/40000 [6:26:26<35:34:16,  4.03s/it]

Episode 8240 - Step 1607359 - Epsilon 0.6690879247991336 - Mean Reward 764.85 - Mean Length 201.32 - Mean Loss 1.678 - Mean Q Value 43.195 - Time Delta 83.998 - Time 2023-11-03T16:46:50


 21%|██        | 8260/40000 [6:27:30<21:56:52,  2.49s/it]

Episode 8260 - Step 1611462 - Epsilon 0.6684019596489521 - Mean Reward 789.2 - Mean Length 214.48 - Mean Loss 1.701 - Mean Q Value 43.0 - Time Delta 59.458 - Time 2023-11-03T16:47:50


 21%|██        | 8280/40000 [6:28:18<18:50:12,  2.14s/it]

Episode 8280 - Step 1615116 - Epsilon 0.6677916531823569 - Mean Reward 774.01 - Mean Length 209.78 - Mean Loss 1.716 - Mean Q Value 42.903 - Time Delta 52.668 - Time 2023-11-03T16:48:42


 21%|██        | 8300/40000 [6:29:27<23:29:43,  2.67s/it]

Episode 8300 - Step 1619793 - Epsilon 0.6670112940006109 - Mean Reward 778.04 - Mean Length 220.02 - Mean Loss 1.718 - Mean Q Value 42.722 - Time Delta 67.053 - Time 2023-11-03T16:49:49


 21%|██        | 8301/40000 [6:29:31<27:12:00,  3.09s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1619793


 21%|██        | 8320/40000 [6:30:28<27:40:42,  3.15s/it]

Episode 8320 - Step 1623849 - Epsilon 0.6663352872572395 - Mean Reward 763.58 - Mean Length 222.3 - Mean Loss 1.75 - Mean Q Value 42.692 - Time Delta 58.54 - Time 2023-11-03T16:50:48


 21%|██        | 8340/40000 [6:31:27<15:59:53,  1.82s/it]

Episode 8340 - Step 1627931 - Epsilon 0.6656556388611227 - Mean Reward 708.94 - Mean Length 205.72 - Mean Loss 1.762 - Mean Q Value 42.59 - Time Delta 59.752 - Time 2023-11-03T16:51:48


 21%|██        | 8360/40000 [6:32:18<34:31:07,  3.93s/it]

Episode 8360 - Step 1631364 - Epsilon 0.6650845849261819 - Mean Reward 684.6 - Mean Length 199.02 - Mean Loss 1.775 - Mean Q Value 42.49 - Time Delta 49.997 - Time 2023-11-03T16:52:38


 21%|██        | 8380/40000 [6:33:02<20:28:53,  2.33s/it]

Episode 8380 - Step 1634466 - Epsilon 0.6645690117054084 - Mean Reward 678.73 - Mean Length 193.5 - Mean Loss 1.805 - Mean Q Value 42.459 - Time Delta 44.914 - Time 2023-11-03T16:53:22


 21%|██        | 8400/40000 [6:33:58<21:09:43,  2.41s/it]

Episode 8400 - Step 1638288 - Epsilon 0.6639343192079298 - Mean Reward 679.81 - Mean Length 184.95 - Mean Loss 1.831 - Mean Q Value 42.521 - Time Delta 55.176 - Time 2023-11-03T16:54:18


 21%|██        | 8401/40000 [6:33:59<19:36:33,  2.23s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1638288


 21%|██        | 8420/40000 [6:34:45<24:01:15,  2.74s/it]

Episode 8420 - Step 1641547 - Epsilon 0.6633935989595107 - Mean Reward 687.37 - Mean Length 176.98 - Mean Loss 1.835 - Mean Q Value 42.45 - Time Delta 47.679 - Time 2023-11-03T16:55:05


 21%|██        | 8440/40000 [6:35:44<34:13:54,  3.90s/it]

Episode 8440 - Step 1645580 - Epsilon 0.6627250693588174 - Mean Reward 685.45 - Mean Length 176.49 - Mean Loss 1.841 - Mean Q Value 42.391 - Time Delta 59.083 - Time 2023-11-03T16:56:04


 21%|██        | 8460/40000 [6:36:38<16:58:31,  1.94s/it]

Episode 8460 - Step 1649545 - Epsilon 0.6620684685331928 - Mean Reward 687.73 - Mean Length 181.81 - Mean Loss 1.848 - Mean Q Value 42.41 - Time Delta 57.912 - Time 2023-11-03T16:57:02


 21%|██        | 8480/40000 [6:37:45<27:34:19,  3.15s/it]

Episode 8480 - Step 1653772 - Epsilon 0.6613691971341644 - Mean Reward 709.83 - Mean Length 193.06 - Mean Loss 1.855 - Mean Q Value 42.148 - Time Delta 61.82 - Time 2023-11-03T16:58:04


 21%|██▏       | 8500/40000 [6:38:44<27:12:16,  3.11s/it]

Episode 8500 - Step 1657987 - Epsilon 0.6606726463151855 - Mean Reward 712.74 - Mean Length 196.99 - Mean Loss 1.87 - Mean Q Value 41.968 - Time Delta 61.887 - Time 2023-11-03T16:59:06


 21%|██▏       | 8501/40000 [6:38:48<27:40:37,  3.16s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1657987


 21%|██▏       | 8520/40000 [6:39:45<25:34:43,  2.93s/it]

Episode 8520 - Step 1662222 - Epsilon 0.6599735292238277 - Mean Reward 730.03 - Mean Length 206.75 - Mean Loss 1.854 - Mean Q Value 41.735 - Time Delta 61.724 - Time 2023-11-03T17:00:08


 21%|██▏       | 8540/40000 [6:40:34<10:42:19,  1.23s/it]

Episode 8540 - Step 1665342 - Epsilon 0.6594589505184238 - Mean Reward 724.45 - Mean Length 197.62 - Mean Loss 1.864 - Mean Q Value 41.59 - Time Delta 45.957 - Time 2023-11-03T17:00:54


 21%|██▏       | 8560/40000 [6:41:34<47:47:56,  5.47s/it]

Episode 8560 - Step 1669651 - Epsilon 0.6587489307781716 - Mean Reward 745.13 - Mean Length 201.06 - Mean Loss 1.861 - Mean Q Value 41.365 - Time Delta 62.553 - Time 2023-11-03T17:01:56


 21%|██▏       | 8580/40000 [6:42:27<18:37:13,  2.13s/it]

Episode 8580 - Step 1673118 - Epsilon 0.6581782074439553 - Mean Reward 739.6 - Mean Length 193.46 - Mean Loss 1.85 - Mean Q Value 41.265 - Time Delta 50.091 - Time 2023-11-03T17:02:46


 22%|██▏       | 8600/40000 [6:43:36<36:23:14,  4.17s/it]

Episode 8600 - Step 1678305 - Epsilon 0.6573252678909207 - Mean Reward 747.67 - Mean Length 203.18 - Mean Loss 1.832 - Mean Q Value 41.096 - Time Delta 74.951 - Time 2023-11-03T17:04:01


 22%|██▏       | 8601/40000 [6:43:43<42:47:02,  4.91s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1678305


 22%|██▏       | 8620/40000 [6:44:52<38:52:44,  4.46s/it]

Episode 8620 - Step 1683357 - Epsilon 0.6564955900256075 - Mean Reward 755.11 - Mean Length 211.35 - Mean Loss 1.813 - Mean Q Value 40.805 - Time Delta 73.692 - Time 2023-11-03T17:05:15


 22%|██▏       | 8640/40000 [6:45:46<15:20:23,  1.76s/it]

Episode 8640 - Step 1686854 - Epsilon 0.6559218994952496 - Mean Reward 753.63 - Mean Length 215.12 - Mean Loss 1.785 - Mean Q Value 40.438 - Time Delta 50.78 - Time 2023-11-03T17:06:06


 22%|██▏       | 8660/40000 [6:46:39<28:04:47,  3.23s/it]

Episode 8660 - Step 1690812 - Epsilon 0.6552731856986023 - Mean Reward 749.89 - Mean Length 211.61 - Mean Loss 1.76 - Mean Q Value 40.134 - Time Delta 56.904 - Time 2023-11-03T17:07:03


 22%|██▏       | 8680/40000 [6:47:29<25:29:51,  2.93s/it]

Episode 8680 - Step 1694129 - Epsilon 0.6547300255802798 - Mean Reward 749.56 - Mean Length 210.11 - Mean Loss 1.743 - Mean Q Value 39.742 - Time Delta 48.029 - Time 2023-11-03T17:07:51


 22%|██▏       | 8700/40000 [6:48:25<15:25:49,  1.77s/it]

Episode 8700 - Step 1698501 - Epsilon 0.6540147965167047 - Mean Reward 718.96 - Mean Length 201.96 - Mean Loss 1.726 - Mean Q Value 39.405 - Time Delta 63.174 - Time 2023-11-03T17:08:54


 22%|██▏       | 8701/40000 [6:48:36<38:00:52,  4.37s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1698501


 22%|██▏       | 8720/40000 [6:49:30<28:47:19,  3.31s/it]

Episode 8720 - Step 1702815 - Epsilon 0.6533098216964435 - Mean Reward 716.86 - Mean Length 194.58 - Mean Loss 1.732 - Mean Q Value 39.272 - Time Delta 62.343 - Time 2023-11-03T17:09:56


 22%|██▏       | 8740/40000 [6:50:14<12:59:15,  1.50s/it]

Episode 8740 - Step 1705566 - Epsilon 0.6528606622827612 - Mean Reward 711.18 - Mean Length 187.12 - Mean Loss 1.742 - Mean Q Value 39.317 - Time Delta 40.597 - Time 2023-11-03T17:10:37


 22%|██▏       | 8760/40000 [6:51:25<28:28:54,  3.28s/it]

Episode 8760 - Step 1710190 - Epsilon 0.6521063913153897 - Mean Reward 708.28 - Mean Length 193.78 - Mean Loss 1.776 - Mean Q Value 39.313 - Time Delta 67.773 - Time 2023-11-03T17:11:45


 22%|██▏       | 8780/40000 [6:52:14<19:53:32,  2.29s/it]

Episode 8780 - Step 1713761 - Epsilon 0.6515244830496669 - Mean Reward 715.62 - Mean Length 196.32 - Mean Loss 1.781 - Mean Q Value 39.375 - Time Delta 51.946 - Time 2023-11-03T17:12:37


 22%|██▏       | 8800/40000 [6:53:11<17:01:09,  1.96s/it]

Episode 8800 - Step 1717556 - Epsilon 0.6509066422536295 - Mean Reward 750.92 - Mean Length 190.55 - Mean Loss 1.8 - Mean Q Value 39.425 - Time Delta 55.61 - Time 2023-11-03T17:13:32


 22%|██▏       | 8801/40000 [6:53:14<20:07:07,  2.32s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1717556


 22%|██▏       | 8820/40000 [6:53:59<18:49:09,  2.17s/it]

Episode 8820 - Step 1720812 - Epsilon 0.6503770197661548 - Mean Reward 731.25 - Mean Length 179.97 - Mean Loss 1.828 - Mean Q Value 39.537 - Time Delta 47.42 - Time 2023-11-03T17:14:20


 22%|██▏       | 8840/40000 [6:54:49<21:45:27,  2.51s/it]

Episode 8840 - Step 1724244 - Epsilon 0.649819235537049 - Mean Reward 741.56 - Mean Length 186.78 - Mean Loss 1.842 - Mean Q Value 39.594 - Time Delta 49.872 - Time 2023-11-03T17:15:09


 22%|██▏       | 8860/40000 [6:55:41<23:41:57,  2.74s/it]

Episode 8860 - Step 1727893 - Epsilon 0.6492267081729166 - Mean Reward 748.74 - Mean Length 177.03 - Mean Loss 1.805 - Mean Q Value 39.732 - Time Delta 53.187 - Time 2023-11-03T17:16:03


 22%|██▏       | 8880/40000 [6:56:43<40:50:34,  4.72s/it]

Episode 8880 - Step 1732655 - Epsilon 0.6484542635690566 - Mean Reward 767.76 - Mean Length 188.94 - Mean Loss 1.784 - Mean Q Value 39.894 - Time Delta 68.451 - Time 2023-11-03T17:17:11


 22%|██▏       | 8900/40000 [6:57:48<45:43:14,  5.29s/it]

Episode 8900 - Step 1737144 - Epsilon 0.6477269438743478 - Mean Reward 740.56 - Mean Length 195.88 - Mean Loss 1.773 - Mean Q Value 40.066 - Time Delta 64.675 - Time 2023-11-03T17:18:16


 22%|██▏       | 8901/40000 [6:57:57<55:40:29,  6.44s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1737144


 22%|██▏       | 8920/40000 [6:58:43<20:40:50,  2.40s/it]

Episode 8920 - Step 1740485 - Epsilon 0.647186155754881 - Mean Reward 724.6 - Mean Length 196.73 - Mean Loss 1.776 - Mean Q Value 40.007 - Time Delta 48.551 - Time 2023-11-03T17:19:04


 22%|██▏       | 8940/40000 [6:59:46<29:55:16,  3.47s/it]

Episode 8940 - Step 1745124 - Epsilon 0.6464360165878957 - Mean Reward 754.93 - Mean Length 208.8 - Mean Loss 1.746 - Mean Q Value 39.827 - Time Delta 66.624 - Time 2023-11-03T17:20:11


 22%|██▏       | 8960/40000 [7:00:37<21:18:03,  2.47s/it]

Episode 8960 - Step 1748491 - Epsilon 0.6458921079525315 - Mean Reward 749.46 - Mean Length 205.98 - Mean Loss 1.751 - Mean Q Value 39.679 - Time Delta 48.458 - Time 2023-11-03T17:20:59


 22%|██▏       | 8980/40000 [7:01:33<22:02:42,  2.56s/it]

Episode 8980 - Step 1752503 - Epsilon 0.6452446028653108 - Mean Reward 733.95 - Mean Length 198.48 - Mean Loss 1.722 - Mean Q Value 39.545 - Time Delta 57.738 - Time 2023-11-03T17:21:57


 22%|██▎       | 9000/40000 [7:02:44<38:57:06,  4.52s/it]

Episode 9000 - Step 1757585 - Epsilon 0.644425340041762 - Mean Reward 766.24 - Mean Length 204.41 - Mean Loss 1.665 - Mean Q Value 39.367 - Time Delta 73.232 - Time 2023-11-03T17:23:10


 23%|██▎       | 9001/40000 [7:02:52<47:53:58,  5.56s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1757585


 23%|██▎       | 9020/40000 [7:03:41<18:15:32,  2.12s/it]

Episode 9020 - Step 1761030 - Epsilon 0.6438705675814104 - Mean Reward 778.21 - Mean Length 205.45 - Mean Loss 1.609 - Mean Q Value 39.309 - Time Delta 49.558 - Time 2023-11-03T17:24:00


 23%|██▎       | 9040/40000 [7:04:35<39:07:43,  4.55s/it]

Episode 9040 - Step 1765096 - Epsilon 0.6432164056017567 - Mean Reward 768.57 - Mean Length 199.72 - Mean Loss 1.597 - Mean Q Value 39.344 - Time Delta 58.349 - Time 2023-11-03T17:24:58


 23%|██▎       | 9060/40000 [7:05:31<17:48:20,  2.07s/it]

Episode 9060 - Step 1768888 - Epsilon 0.6426069253122764 - Mean Reward 764.07 - Mean Length 203.97 - Mean Loss 1.57 - Mean Q Value 39.308 - Time Delta 54.583 - Time 2023-11-03T17:25:53


 23%|██▎       | 9080/40000 [7:06:27<20:43:58,  2.41s/it]

Episode 9080 - Step 1772536 - Epsilon 0.642021134884122 - Mean Reward 750.39 - Mean Length 200.33 - Mean Loss 1.578 - Mean Q Value 39.203 - Time Delta 53.346 - Time 2023-11-03T17:26:46


 23%|██▎       | 9100/40000 [7:07:20<16:31:42,  1.93s/it]

Episode 9100 - Step 1776453 - Epsilon 0.6413927433355774 - Mean Reward 726.88 - Mean Length 188.68 - Mean Loss 1.596 - Mean Q Value 39.174 - Time Delta 57.434 - Time 2023-11-03T17:27:44


 23%|██▎       | 9101/40000 [7:07:25<25:11:20,  2.93s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1776453


 23%|██▎       | 9120/40000 [7:08:28<28:31:20,  3.33s/it]

Episode 9120 - Step 1780877 - Epsilon 0.6406837550155401 - Mean Reward 731.45 - Mean Length 198.47 - Mean Loss 1.601 - Mean Q Value 39.256 - Time Delta 63.773 - Time 2023-11-03T17:28:47


 23%|██▎       | 9140/40000 [7:09:21<19:01:07,  2.22s/it]

Episode 9140 - Step 1784609 - Epsilon 0.6400862757647517 - Mean Reward 734.19 - Mean Length 195.13 - Mean Loss 1.573 - Mean Q Value 39.344 - Time Delta 54.371 - Time 2023-11-03T17:29:42


 23%|██▎       | 9160/40000 [7:10:21<22:27:59,  2.62s/it]

Episode 9160 - Step 1788677 - Epsilon 0.639435638845829 - Mean Reward 741.11 - Mean Length 197.89 - Mean Loss 1.57 - Mean Q Value 39.461 - Time Delta 58.607 - Time 2023-11-03T17:30:40


 23%|██▎       | 9180/40000 [7:11:23<33:18:13,  3.89s/it]

Episode 9180 - Step 1792961 - Epsilon 0.6387511697895769 - Mean Reward 741.81 - Mean Length 204.25 - Mean Loss 1.574 - Mean Q Value 39.547 - Time Delta 61.528 - Time 2023-11-03T17:31:42


 23%|██▎       | 9200/40000 [7:12:10<14:21:42,  1.68s/it]

Episode 9200 - Step 1796346 - Epsilon 0.6382108551973847 - Mean Reward 724.89 - Mean Length 198.93 - Mean Loss 1.593 - Mean Q Value 39.575 - Time Delta 48.93 - Time 2023-11-03T17:32:31


 23%|██▎       | 9201/40000 [7:12:13<17:44:37,  2.07s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1796346


 23%|██▎       | 9220/40000 [7:13:03<26:07:42,  3.06s/it]

Episode 9220 - Step 1799896 - Epsilon 0.6376446942636256 - Mean Reward 708.01 - Mean Length 190.19 - Mean Loss 1.585 - Mean Q Value 39.532 - Time Delta 51.407 - Time 2023-11-03T17:33:22


 23%|██▎       | 9240/40000 [7:13:56<24:53:25,  2.91s/it]

Episode 9240 - Step 1803777 - Epsilon 0.6370263194592178 - Mean Reward 701.13 - Mean Length 191.68 - Mean Loss 1.602 - Mean Q Value 39.494 - Time Delta 56.209 - Time 2023-11-03T17:34:18


 23%|██▎       | 9260/40000 [7:14:50<19:53:00,  2.33s/it]

Episode 9260 - Step 1807484 - Epsilon 0.6364362287193461 - Mean Reward 687.11 - Mean Length 188.07 - Mean Loss 1.584 - Mean Q Value 39.385 - Time Delta 53.081 - Time 2023-11-03T17:35:12


 23%|██▎       | 9280/40000 [7:15:35<21:26:30,  2.51s/it]

Episode 9280 - Step 1810535 - Epsilon 0.635950972013467 - Mean Reward 683.66 - Mean Length 175.74 - Mean Loss 1.566 - Mean Q Value 39.461 - Time Delta 43.685 - Time 2023-11-03T17:35:55


 23%|██▎       | 9300/40000 [7:16:22<20:38:03,  2.42s/it]

Episode 9300 - Step 1813969 - Epsilon 0.6354052423236906 - Mean Reward 701.25 - Mean Length 176.23 - Mean Loss 1.587 - Mean Q Value 39.553 - Time Delta 49.302 - Time 2023-11-03T17:36:45


 23%|██▎       | 9301/40000 [7:16:26<24:17:45,  2.85s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1813969


 23%|██▎       | 9320/40000 [7:17:21<19:58:45,  2.34s/it]

Episode 9320 - Step 1818029 - Epsilon 0.6347606331165803 - Mean Reward 732.44 - Mean Length 181.33 - Mean Loss 1.601 - Mean Q Value 39.687 - Time Delta 58.466 - Time 2023-11-03T17:37:43


 23%|██▎       | 9340/40000 [7:18:11<20:09:21,  2.37s/it]

Episode 9340 - Step 1821546 - Epsilon 0.6342027650487904 - Mean Reward 713.11 - Mean Length 177.69 - Mean Loss 1.62 - Mean Q Value 39.586 - Time Delta 51.023 - Time 2023-11-03T17:38:34


 23%|██▎       | 9360/40000 [7:19:24<27:40:43,  3.25s/it]

Episode 9360 - Step 1826365 - Epsilon 0.6334391692354296 - Mean Reward 735.33 - Mean Length 188.81 - Mean Loss 1.643 - Mean Q Value 39.51 - Time Delta 69.635 - Time 2023-11-03T17:39:44


 23%|██▎       | 9380/40000 [7:20:22<30:47:27,  3.62s/it]

Episode 9380 - Step 1830379 - Epsilon 0.6328038317834788 - Mean Reward 748.03 - Mean Length 198.44 - Mean Loss 1.653 - Mean Q Value 39.335 - Time Delta 58.686 - Time 2023-11-03T17:40:42


 24%|██▎       | 9400/40000 [7:21:18<21:04:59,  2.48s/it]

Episode 9400 - Step 1834270 - Epsilon 0.6321885710750593 - Mean Reward 737.86 - Mean Length 203.01 - Mean Loss 1.623 - Mean Q Value 39.208 - Time Delta 56.139 - Time 2023-11-03T17:41:38


 24%|██▎       | 9401/40000 [7:21:20<19:37:31,  2.31s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1834270


 24%|██▎       | 9420/40000 [7:22:33<28:32:17,  3.36s/it]

Episode 9420 - Step 1839337 - Epsilon 0.631388253110909 - Mean Reward 749.26 - Mean Length 213.08 - Mean Loss 1.59 - Mean Q Value 39.026 - Time Delta 73.922 - Time 2023-11-03T17:42:52


 24%|██▎       | 9440/40000 [7:23:41<24:08:36,  2.84s/it]

Episode 9440 - Step 1843970 - Epsilon 0.630657370929133 - Mean Reward 765.09 - Mean Length 224.24 - Mean Loss 1.605 - Mean Q Value 39.009 - Time Delta 67.726 - Time 2023-11-03T17:44:00


 24%|██▎       | 9460/40000 [7:24:32<29:24:44,  3.47s/it]

Episode 9460 - Step 1847495 - Epsilon 0.6301018488643455 - Mean Reward 771.84 - Mean Length 211.3 - Mean Loss 1.643 - Mean Q Value 39.047 - Time Delta 51.861 - Time 2023-11-03T17:44:52


 24%|██▎       | 9480/40000 [7:25:36<43:56:05,  5.18s/it]

Episode 9480 - Step 1851853 - Epsilon 0.6294157266470974 - Mean Reward 764.56 - Mean Length 214.74 - Mean Loss 1.684 - Mean Q Value 39.128 - Time Delta 63.594 - Time 2023-11-03T17:45:56


 24%|██▍       | 9500/40000 [7:26:21<24:23:26,  2.88s/it]

Episode 9500 - Step 1854975 - Epsilon 0.6289246592749698 - Mean Reward 765.74 - Mean Length 207.05 - Mean Loss 1.729 - Mean Q Value 39.165 - Time Delta 45.736 - Time 2023-11-03T17:46:41


 24%|██▍       | 9501/40000 [7:26:23<21:45:01,  2.57s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1854975


 24%|██▍       | 9520/40000 [7:27:04<23:54:50,  2.82s/it]

Episode 9520 - Step 1857877 - Epsilon 0.6284685398549136 - Mean Reward 730.22 - Mean Length 185.4 - Mean Loss 1.775 - Mean Q Value 39.201 - Time Delta 42.445 - Time 2023-11-03T17:47:24


 24%|██▍       | 9540/40000 [7:27:57<19:54:39,  2.35s/it]

Episode 9540 - Step 1861536 - Epsilon 0.6278939110470931 - Mean Reward 735.46 - Mean Length 175.66 - Mean Loss 1.785 - Mean Q Value 39.328 - Time Delta 52.722 - Time 2023-11-03T17:48:16


 24%|██▍       | 9560/40000 [7:29:02<18:18:20,  2.16s/it]

Episode 9560 - Step 1866292 - Epsilon 0.627147788751095 - Mean Reward 747.59 - Mean Length 187.97 - Mean Loss 1.781 - Mean Q Value 39.526 - Time Delta 68.232 - Time 2023-11-03T17:49:25


 24%|██▍       | 9580/40000 [7:30:00<15:34:46,  1.84s/it]

Episode 9580 - Step 1870220 - Epsilon 0.6265322318334261 - Mean Reward 728.18 - Mean Length 183.67 - Mean Loss 1.799 - Mean Q Value 39.623 - Time Delta 56.634 - Time 2023-11-03T17:50:21


 24%|██▍       | 9600/40000 [7:31:04<43:43:54,  5.18s/it]

Episode 9600 - Step 1874843 - Epsilon 0.6258085354025518 - Mean Reward 741.73 - Mean Length 198.68 - Mean Loss 1.809 - Mean Q Value 39.661 - Time Delta 66.792 - Time 2023-11-03T17:51:28


 24%|██▍       | 9601/40000 [7:31:10<44:39:17,  5.29s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1874843


 24%|██▍       | 9620/40000 [7:32:12<29:06:30,  3.45s/it]

Episode 9620 - Step 1879180 - Epsilon 0.6251303701299654 - Mean Reward 781.0 - Mean Length 213.03 - Mean Loss 1.853 - Mean Q Value 39.744 - Time Delta 63.803 - Time 2023-11-03T17:52:32


 24%|██▍       | 9640/40000 [7:33:06<25:42:39,  3.05s/it]

Episode 9640 - Step 1882867 - Epsilon 0.6245544216202704 - Mean Reward 796.1 - Mean Length 213.31 - Mean Loss 1.863 - Mean Q Value 39.856 - Time Delta 53.344 - Time 2023-11-03T17:53:25


 24%|██▍       | 9660/40000 [7:33:50<16:00:03,  1.90s/it]

Episode 9660 - Step 1885915 - Epsilon 0.6240786923673294 - Mean Reward 757.12 - Mean Length 196.23 - Mean Loss 1.896 - Mean Q Value 39.921 - Time Delta 44.397 - Time 2023-11-03T17:54:10


 24%|██▍       | 9680/40000 [7:34:31<21:41:35,  2.58s/it]

Episode 9680 - Step 1889029 - Epsilon 0.6235930361107719 - Mean Reward 773.25 - Mean Length 188.09 - Mean Loss 1.895 - Mean Q Value 40.104 - Time Delta 44.755 - Time 2023-11-03T17:54:54


 24%|██▍       | 9700/40000 [7:35:26<22:44:36,  2.70s/it]

Episode 9700 - Step 1892961 - Epsilon 0.6229803452664867 - Mean Reward 787.22 - Mean Length 181.18 - Mean Loss 1.909 - Mean Q Value 40.338 - Time Delta 56.836 - Time 2023-11-03T17:55:51


 24%|██▍       | 9701/40000 [7:35:33<32:53:55,  3.91s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1892961


 24%|██▍       | 9720/40000 [7:36:39<24:56:26,  2.97s/it]

Episode 9720 - Step 1897717 - Epsilon 0.6222400617291018 - Mean Reward 785.49 - Mean Length 185.37 - Mean Loss 1.904 - Mean Q Value 40.501 - Time Delta 68.093 - Time 2023-11-03T17:56:59


 24%|██▍       | 9740/40000 [7:37:33<39:53:28,  4.75s/it]

Episode 9740 - Step 1901543 - Epsilon 0.6216451735862222 - Mean Reward 742.33 - Mean Length 186.76 - Mean Loss 1.9 - Mean Q Value 40.65 - Time Delta 54.519 - Time 2023-11-03T17:57:54


 24%|██▍       | 9760/40000 [7:38:28<24:26:37,  2.91s/it]

Episode 9760 - Step 1905187 - Epsilon 0.621079112642159 - Mean Reward 749.7 - Mean Length 192.72 - Mean Loss 1.876 - Mean Q Value 40.664 - Time Delta 53.069 - Time 2023-11-03T17:58:47


 24%|██▍       | 9780/40000 [7:39:15<24:26:21,  2.91s/it]

Episode 9780 - Step 1908908 - Epsilon 0.6205016223721636 - Mean Reward 736.88 - Mean Length 198.79 - Mean Loss 1.852 - Mean Q Value 40.691 - Time Delta 53.97 - Time 2023-11-03T17:59:41


 24%|██▍       | 9800/40000 [7:40:23<22:42:29,  2.71s/it]

Episode 9800 - Step 1913359 - Epsilon 0.6198115431195601 - Mean Reward 737.39 - Mean Length 203.98 - Mean Loss 1.808 - Mean Q Value 40.593 - Time Delta 64.092 - Time 2023-11-03T18:00:45


 25%|██▍       | 9801/40000 [7:40:27<23:58:35,  2.86s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1913359


 25%|██▍       | 9820/40000 [7:41:28<28:21:06,  3.38s/it]

Episode 9820 - Step 1917921 - Epsilon 0.6191050509199779 - Mean Reward 717.48 - Mean Length 202.04 - Mean Loss 1.759 - Mean Q Value 40.491 - Time Delta 65.788 - Time 2023-11-03T18:01:51


 25%|██▍       | 9840/40000 [7:42:37<18:44:36,  2.24s/it]

Episode 9840 - Step 1922728 - Epsilon 0.6183614882097174 - Mean Reward 727.24 - Mean Length 211.85 - Mean Loss 1.712 - Mean Q Value 40.274 - Time Delta 68.998 - Time 2023-11-03T18:03:00


 25%|██▍       | 9860/40000 [7:43:24<24:31:56,  2.93s/it]

Episode 9860 - Step 1926415 - Epsilon 0.6177917760430987 - Mean Reward 735.44 - Mean Length 212.28 - Mean Loss 1.673 - Mean Q Value 40.153 - Time Delta 52.608 - Time 2023-11-03T18:03:52


 25%|██▍       | 9880/40000 [7:44:23<25:16:42,  3.02s/it]

Episode 9880 - Step 1929928 - Epsilon 0.6172494385367394 - Mean Reward 758.03 - Mean Length 210.2 - Mean Loss 1.625 - Mean Q Value 39.993 - Time Delta 50.039 - Time 2023-11-03T18:04:42


 25%|██▍       | 9900/40000 [7:45:05<17:34:19,  2.10s/it]

Episode 9900 - Step 1933128 - Epsilon 0.6167558363913246 - Mean Reward 717.87 - Mean Length 197.69 - Mean Loss 1.572 - Mean Q Value 39.914 - Time Delta 45.943 - Time 2023-11-03T18:05:28


 25%|██▍       | 9901/40000 [7:45:10<25:04:28,  3.00s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1933128


 25%|██▍       | 9920/40000 [7:45:54<19:56:29,  2.39s/it]

Episode 9920 - Step 1936209 - Epsilon 0.6162809630579901 - Mean Reward 713.32 - Mean Length 182.88 - Mean Loss 1.537 - Mean Q Value 39.971 - Time Delta 45.842 - Time 2023-11-03T18:06:14


 25%|██▍       | 9940/40000 [7:46:56<14:23:52,  1.72s/it]

Episode 9940 - Step 1940371 - Epsilon 0.6156400561253785 - Mean Reward 709.87 - Mean Length 176.43 - Mean Loss 1.507 - Mean Q Value 40.068 - Time Delta 60.95 - Time 2023-11-03T18:07:15


 25%|██▍       | 9960/40000 [7:47:38<13:21:39,  1.60s/it]

Episode 9960 - Step 1943438 - Epsilon 0.615168194976182 - Mean Reward 676.77 - Mean Length 170.23 - Mean Loss 1.478 - Mean Q Value 40.147 - Time Delta 44.833 - Time 2023-11-03T18:08:00


 25%|██▍       | 9980/40000 [7:48:19<13:31:39,  1.62s/it]

Episode 9980 - Step 1946114 - Epsilon 0.6147567850342306 - Mean Reward 639.02 - Mean Length 161.86 - Mean Loss 1.469 - Mean Q Value 40.157 - Time Delta 39.42 - Time 2023-11-03T18:08:39


 25%|██▌       | 10000/40000 [7:49:25<20:11:34,  2.42s/it]

Episode 10000 - Step 1951003 - Epsilon 0.6140058574638422 - Mean Reward 660.68 - Mean Length 178.75 - Mean Loss 1.466 - Mean Q Value 40.168 - Time Delta 70.709 - Time 2023-11-03T18:09:50


 25%|██▌       | 10001/40000 [7:49:32<30:28:02,  3.66s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1951003


 25%|██▌       | 10020/40000 [7:50:39<32:05:48,  3.85s/it]

Episode 10020 - Step 1956045 - Epsilon 0.6132323905636411 - Mean Reward 669.57 - Mean Length 198.36 - Mean Loss 1.459 - Mean Q Value 39.984 - Time Delta 73.036 - Time 2023-11-03T18:11:03


 25%|██▌       | 10040/40000 [7:52:00<23:44:10,  2.85s/it]

Episode 10040 - Step 1961428 - Epsilon 0.6124076880170227 - Mean Reward 690.22 - Mean Length 210.57 - Mean Loss 1.443 - Mean Q Value 39.565 - Time Delta 77.479 - Time 2023-11-03T18:12:21


 25%|██▌       | 10060/40000 [7:53:24<30:50:02,  3.71s/it]

Episode 10060 - Step 1967317 - Epsilon 0.6115067340630622 - Mean Reward 710.67 - Mean Length 238.79 - Mean Loss 1.419 - Mean Q Value 39.066 - Time Delta 84.895 - Time 2023-11-03T18:13:46


 25%|██▌       | 10080/40000 [7:54:21<32:10:24,  3.87s/it]

Episode 10080 - Step 1971166 - Epsilon 0.6109185946485679 - Mean Reward 738.88 - Mean Length 250.52 - Mean Loss 1.411 - Mean Q Value 38.595 - Time Delta 56.079 - Time 2023-11-03T18:14:42


 25%|██▌       | 10100/40000 [7:55:25<27:48:50,  3.35s/it]

Episode 10100 - Step 1975638 - Epsilon 0.6102359692330375 - Mean Reward 758.5 - Mean Length 246.35 - Mean Loss 1.399 - Mean Q Value 38.089 - Time Delta 64.662 - Time 2023-11-03T18:15:46


 25%|██▌       | 10101/40000 [7:55:28<26:51:07,  3.23s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1975638


 25%|██▌       | 10120/40000 [7:56:32<31:32:11,  3.80s/it]

Episode 10120 - Step 1980264 - Epsilon 0.6095306391820723 - Mean Reward 779.62 - Mean Length 242.19 - Mean Loss 1.381 - Mean Q Value 37.68 - Time Delta 66.599 - Time 2023-11-03T18:16:53


 25%|██▌       | 10140/40000 [7:57:14<11:18:09,  1.36s/it]

Episode 10140 - Step 1983316 - Epsilon 0.6090657446260304 - Mean Reward 755.72 - Mean Length 218.88 - Mean Loss 1.357 - Mean Q Value 37.551 - Time Delta 44.029 - Time 2023-11-03T18:17:37


 25%|██▌       | 10160/40000 [7:58:11<19:57:17,  2.41s/it]

Episode 10160 - Step 1987003 - Epsilon 0.6085045968644053 - Mean Reward 760.82 - Mean Length 196.86 - Mean Loss 1.351 - Mean Q Value 37.421 - Time Delta 53.216 - Time 2023-11-03T18:18:30


 25%|██▌       | 10180/40000 [7:59:01<27:11:29,  3.28s/it]

Episode 10180 - Step 1990522 - Epsilon 0.6079695002884704 - Mean Reward 759.37 - Mean Length 193.56 - Mean Loss 1.312 - Mean Q Value 37.317 - Time Delta 51.349 - Time 2023-11-03T18:19:21


 26%|██▌       | 10200/40000 [7:59:49<13:46:24,  1.66s/it]

Episode 10200 - Step 1993893 - Epsilon 0.6074573497657534 - Mean Reward 727.39 - Mean Length 182.55 - Mean Loss 1.295 - Mean Q Value 37.213 - Time Delta 49.338 - Time 2023-11-03T18:20:11


 26%|██▌       | 10201/40000 [7:59:53<19:14:35,  2.32s/it]

MarioNet saved to checkpoints2/mario_net_3.chkpt at step 1993893


 26%|██▌       | 10220/40000 [8:00:43<15:09:47,  1.83s/it]

Episode 10220 - Step 1997587 - Epsilon 0.6068966217890704 - Mean Reward 712.97 - Mean Length 173.23 - Mean Loss 1.29 - Mean Q Value 37.108 - Time Delta 53.85 - Time 2023-11-03T18:21:05


 26%|██▌       | 10229/40000 [8:01:16<30:39:44,  3.71s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2000000


 26%|██▌       | 10240/40000 [8:02:00<24:12:37,  2.93s/it]

Episode 10240 - Step 2003485 - Epsilon 0.6060024120265014 - Mean Reward 775.69 - Mean Length 201.69 - Mean Loss 1.302 - Mean Q Value 37.023 - Time Delta 85.082 - Time 2023-11-03T18:22:30


 26%|██▌       | 10260/40000 [8:03:05<27:29:44,  3.33s/it]

Episode 10260 - Step 2007610 - Epsilon 0.605377794084918 - Mean Reward 788.44 - Mean Length 206.07 - Mean Loss 1.298 - Mean Q Value 36.987 - Time Delta 60.135 - Time 2023-11-03T18:23:30


 26%|██▌       | 10280/40000 [8:04:40<45:08:12,  5.47s/it]

Episode 10280 - Step 2014007 - Epsilon 0.6044104172715377 - Mean Reward 852.5 - Mean Length 234.85 - Mean Loss 1.321 - Mean Q Value 36.962 - Time Delta 91.934 - Time 2023-11-03T18:25:02


 26%|██▌       | 10300/40000 [8:05:29<16:45:18,  2.03s/it]

Episode 10300 - Step 2017716 - Epsilon 0.603850237395557 - Mean Reward 876.78 - Mean Length 238.23 - Mean Loss 1.309 - Mean Q Value 36.903 - Time Delta 53.176 - Time 2023-11-03T18:25:55


 26%|██▌       | 10301/40000 [8:05:37<29:41:54,  3.60s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2017716


 26%|██▌       | 10320/40000 [8:06:26<24:33:17,  2.98s/it]

Episode 10320 - Step 2021330 - Epsilon 0.603304905028821 - Mean Reward 840.43 - Mean Length 237.43 - Mean Loss 1.311 - Mean Q Value 36.978 - Time Delta 51.811 - Time 2023-11-03T18:26:47


 26%|██▌       | 10340/40000 [8:07:53<21:13:55,  2.58s/it]

Episode 10340 - Step 2027277 - Epsilon 0.6024086077991594 - Mean Reward 809.49 - Mean Length 237.92 - Mean Loss 1.33 - Mean Q Value 36.918 - Time Delta 85.375 - Time 2023-11-03T18:28:12


 26%|██▌       | 10360/40000 [8:08:45<30:33:46,  3.71s/it]

Episode 10360 - Step 2030986 - Epsilon 0.6018502832409354 - Mean Reward 798.72 - Mean Length 233.76 - Mean Loss 1.363 - Mean Q Value 36.811 - Time Delta 53.872 - Time 2023-11-03T18:29:06


 26%|██▌       | 10380/40000 [8:09:55<32:40:44,  3.97s/it]

Episode 10380 - Step 2035736 - Epsilon 0.6011360101238049 - Mean Reward 766.53 - Mean Length 217.29 - Mean Loss 1.363 - Mean Q Value 36.614 - Time Delta 68.516 - Time 2023-11-03T18:30:15


 26%|██▌       | 10400/40000 [8:10:40<20:30:04,  2.49s/it]

Episode 10400 - Step 2039055 - Epsilon 0.6006374243363968 - Mean Reward 744.72 - Mean Length 213.39 - Mean Loss 1.369 - Mean Q Value 36.449 - Time Delta 48.395 - Time 2023-11-03T18:31:03


 26%|██▌       | 10401/40000 [8:10:45<25:12:20,  3.07s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2039055


 26%|██▌       | 10420/40000 [8:11:28<21:08:20,  2.57s/it]

Episode 10420 - Step 2042323 - Epsilon 0.6001469039042148 - Mean Reward 765.8 - Mean Length 209.93 - Mean Loss 1.408 - Mean Q Value 36.244 - Time Delta 47.246 - Time 2023-11-03T18:31:50


 26%|██▌       | 10440/40000 [8:12:26<24:05:37,  2.93s/it]

Episode 10440 - Step 2046407 - Epsilon 0.599534466541165 - Mean Reward 776.98 - Mean Length 191.3 - Mean Loss 1.43 - Mean Q Value 36.167 - Time Delta 59.082 - Time 2023-11-03T18:32:49


 26%|██▌       | 10460/40000 [8:13:25<35:10:31,  4.29s/it]

Episode 10460 - Step 2050315 - Epsilon 0.5989490073374817 - Mean Reward 789.36 - Mean Length 193.29 - Mean Loss 1.436 - Mean Q Value 36.299 - Time Delta 57.363 - Time 2023-11-03T18:33:47


 26%|██▌       | 10480/40000 [8:14:25<25:01:08,  3.05s/it]

Episode 10480 - Step 2054258 - Epsilon 0.5983588841843795 - Mean Reward 786.05 - Mean Length 185.22 - Mean Loss 1.477 - Mean Q Value 36.517 - Time Delta 57.65 - Time 2023-11-03T18:34:44


 26%|██▋       | 10500/40000 [8:15:05<19:16:34,  2.35s/it]

Episode 10500 - Step 2057395 - Epsilon 0.597889805132425 - Mean Reward 773.61 - Mean Length 183.4 - Mean Loss 1.541 - Mean Q Value 36.84 - Time Delta 45.281 - Time 2023-11-03T18:35:30


 26%|██▋       | 10501/40000 [8:15:11<28:35:26,  3.49s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2057395


 26%|██▋       | 10520/40000 [8:16:07<27:17:05,  3.33s/it]

Episode 10520 - Step 2061664 - Epsilon 0.5972520475420166 - Mean Reward 816.83 - Mean Length 193.41 - Mean Loss 1.521 - Mean Q Value 37.32 - Time Delta 61.787 - Time 2023-11-03T18:36:31


 26%|██▋       | 10540/40000 [8:17:23<42:47:31,  5.23s/it]

Episode 10540 - Step 2066698 - Epsilon 0.5965008785184296 - Mean Reward 809.32 - Mean Length 202.91 - Mean Loss 1.497 - Mean Q Value 37.838 - Time Delta 73.524 - Time 2023-11-03T18:37:45


 26%|██▋       | 10560/40000 [8:18:11<13:25:52,  1.64s/it]

Episode 10560 - Step 2069852 - Epsilon 0.5960307229000941 - Mean Reward 784.29 - Mean Length 195.37 - Mean Loss 1.47 - Mean Q Value 38.283 - Time Delta 45.893 - Time 2023-11-03T18:38:31


 26%|██▋       | 10580/40000 [8:18:57<21:25:39,  2.62s/it]

Episode 10580 - Step 2074357 - Epsilon 0.5953598210869705 - Mean Reward 761.23 - Mean Length 200.99 - Mean Loss 1.445 - Mean Q Value 38.821 - Time Delta 65.094 - Time 2023-11-03T18:39:36


 26%|██▋       | 10600/40000 [8:20:00<12:58:16,  1.59s/it]

Episode 10600 - Step 2077433 - Epsilon 0.594902165318479 - Mean Reward 768.12 - Mean Length 200.38 - Mean Loss 1.398 - Mean Q Value 39.188 - Time Delta 44.965 - Time 2023-11-03T18:40:21


 27%|██▋       | 10601/40000 [8:20:03<14:07:33,  1.73s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2077433


 27%|██▋       | 10620/40000 [8:20:55<25:13:03,  3.09s/it]

Episode 10620 - Step 2081270 - Epsilon 0.5943317789603125 - Mean Reward 737.91 - Mean Length 196.06 - Mean Loss 1.391 - Mean Q Value 39.44 - Time Delta 55.954 - Time 2023-11-03T18:41:17


 27%|██▋       | 10640/40000 [8:21:26<13:19:50,  1.63s/it]

Episode 10640 - Step 2083188 - Epsilon 0.5940468651501933 - Mean Reward 670.56 - Mean Length 164.9 - Mean Loss 1.396 - Mean Q Value 39.66 - Time Delta 28.022 - Time 2023-11-03T18:41:45


 27%|██▋       | 10660/40000 [8:22:50<27:22:34,  3.36s/it]

Episode 10660 - Step 2089043 - Epsilon 0.5931779650240093 - Mean Reward 701.71 - Mean Length 191.91 - Mean Loss 1.393 - Mean Q Value 39.721 - Time Delta 84.614 - Time 2023-11-03T18:43:09


 27%|██▋       | 10680/40000 [8:24:06<21:07:15,  2.59s/it]

Episode 10680 - Step 2094282 - Epsilon 0.5924015586472774 - Mean Reward 702.7 - Mean Length 199.25 - Mean Loss 1.406 - Mean Q Value 39.607 - Time Delta 76.189 - Time 2023-11-03T18:44:26


 27%|██▋       | 10700/40000 [8:25:01<26:19:41,  3.23s/it]

Episode 10700 - Step 2098064 - Epsilon 0.5918417076148003 - Mean Reward 737.28 - Mean Length 206.31 - Mean Loss 1.43 - Mean Q Value 39.621 - Time Delta 54.432 - Time 2023-11-03T18:45:20


 27%|██▋       | 10701/40000 [8:25:02<20:57:58,  2.58s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2098064


 27%|██▋       | 10720/40000 [8:26:16<66:02:37,  8.12s/it]

Episode 10720 - Step 2103494 - Epsilon 0.5910388274744345 - Mean Reward 721.01 - Mean Length 222.24 - Mean Loss 1.442 - Mean Q Value 39.591 - Time Delta 78.552 - Time 2023-11-03T18:46:39


 27%|██▋       | 10740/40000 [8:27:08<29:55:22,  3.68s/it]

Episode 10740 - Step 2106961 - Epsilon 0.5905267664531428 - Mean Reward 760.8 - Mean Length 237.73 - Mean Loss 1.431 - Mean Q Value 39.384 - Time Delta 50.936 - Time 2023-11-03T18:47:30


 27%|██▋       | 10760/40000 [8:28:00<20:44:17,  2.55s/it]

Episode 10760 - Step 2110476 - Epsilon 0.5900080689283166 - Mean Reward 735.11 - Mean Length 214.33 - Mean Loss 1.446 - Mean Q Value 39.355 - Time Delta 51.046 - Time 2023-11-03T18:48:21


 27%|██▋       | 10780/40000 [8:28:48<25:44:41,  3.17s/it]

Episode 10780 - Step 2113768 - Epsilon 0.5895226919868604 - Mean Reward 718.6 - Mean Length 194.86 - Mean Loss 1.457 - Mean Q Value 39.316 - Time Delta 48.061 - Time 2023-11-03T18:49:09


 27%|██▋       | 10800/40000 [8:29:53<21:43:47,  2.68s/it]

Episode 10800 - Step 2118242 - Epsilon 0.5888636793949013 - Mean Reward 701.69 - Mean Length 201.78 - Mean Loss 1.451 - Mean Q Value 39.35 - Time Delta 65.264 - Time 2023-11-03T18:50:14


 27%|██▋       | 10801/40000 [8:29:56<22:43:08,  2.80s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2118242


 27%|██▋       | 10820/40000 [8:30:50<22:07:44,  2.73s/it]

Episode 10820 - Step 2122319 - Epsilon 0.5882637857873215 - Mean Reward 733.13 - Mean Length 188.25 - Mean Loss 1.454 - Mean Q Value 39.3 - Time Delta 59.55 - Time 2023-11-03T18:51:13


 27%|██▋       | 10840/40000 [8:31:38<15:00:50,  1.85s/it]

Episode 10840 - Step 2125322 - Epsilon 0.5878223124336888 - Mean Reward 727.29 - Mean Length 183.61 - Mean Loss 1.472 - Mean Q Value 39.489 - Time Delta 43.906 - Time 2023-11-03T18:51:57


 27%|██▋       | 10860/40000 [8:32:29<19:03:23,  2.35s/it]

Episode 10860 - Step 2129009 - Epsilon 0.5872807367864518 - Mean Reward 743.05 - Mean Length 185.33 - Mean Loss 1.497 - Mean Q Value 39.741 - Time Delta 53.13 - Time 2023-11-03T18:52:50


 27%|██▋       | 10880/40000 [8:33:12<14:17:52,  1.77s/it]

Episode 10880 - Step 2131869 - Epsilon 0.5868609810876639 - Mean Reward 747.48 - Mean Length 181.01 - Mean Loss 1.534 - Mean Q Value 40.057 - Time Delta 41.579 - Time 2023-11-03T18:53:32


 27%|██▋       | 10900/40000 [8:33:51<12:43:55,  1.58s/it]

Episode 10900 - Step 2134984 - Epsilon 0.5864041409466915 - Mean Reward 723.71 - Mean Length 167.42 - Mean Loss 1.564 - Mean Q Value 40.374 - Time Delta 44.974 - Time 2023-11-03T18:54:17


 27%|██▋       | 10901/40000 [8:33:59<27:18:35,  3.38s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2134984


 27%|██▋       | 10920/40000 [8:34:52<24:19:05,  3.01s/it]

Episode 10920 - Step 2138954 - Epsilon 0.5858224234890428 - Mean Reward 710.03 - Mean Length 166.35 - Mean Loss 1.586 - Mean Q Value 40.613 - Time Delta 57.936 - Time 2023-11-03T18:55:15


 27%|██▋       | 10940/40000 [8:35:40<16:10:00,  2.00s/it]

Episode 10940 - Step 2141991 - Epsilon 0.5853778065674268 - Mean Reward 723.03 - Mean Length 166.69 - Mean Loss 1.62 - Mean Q Value 40.775 - Time Delta 44.074 - Time 2023-11-03T18:55:59


 27%|██▋       | 10960/40000 [8:36:30<20:54:33,  2.59s/it]

Episode 10960 - Step 2145592 - Epsilon 0.5848510572697492 - Mean Reward 723.71 - Mean Length 165.83 - Mean Loss 1.621 - Mean Q Value 40.849 - Time Delta 52.479 - Time 2023-11-03T18:56:52


 27%|██▋       | 10980/40000 [8:37:13<20:29:08,  2.54s/it]

Episode 10980 - Step 2148536 - Epsilon 0.584420765204656 - Mean Reward 723.2 - Mean Length 166.67 - Mean Loss 1.606 - Mean Q Value 40.932 - Time Delta 42.594 - Time 2023-11-03T18:57:34


 28%|██▊       | 11000/40000 [8:38:10<29:24:02,  3.65s/it]

Episode 11000 - Step 2152373 - Epsilon 0.5838604283100637 - Mean Reward 731.71 - Mean Length 173.89 - Mean Loss 1.598 - Mean Q Value 40.954 - Time Delta 55.408 - Time 2023-11-03T18:58:30


 28%|██▊       | 11001/40000 [8:38:11<23:10:48,  2.88s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2152373


 28%|██▊       | 11020/40000 [8:38:57<13:14:30,  1.64s/it]

Episode 11020 - Step 2156055 - Epsilon 0.5833232320011135 - Mean Reward 734.45 - Mean Length 171.01 - Mean Loss 1.593 - Mean Q Value 41.06 - Time Delta 52.98 - Time 2023-11-03T18:59:23


 28%|██▊       | 11040/40000 [8:40:14<18:30:55,  2.30s/it]

Episode 11040 - Step 2161220 - Epsilon 0.582570501869577 - Mean Reward 756.93 - Mean Length 192.29 - Mean Loss 1.598 - Mean Q Value 41.084 - Time Delta 74.072 - Time 2023-11-03T19:00:37


 28%|██▊       | 11060/40000 [8:41:03<22:55:05,  2.85s/it]

Episode 11060 - Step 2164367 - Epsilon 0.582112344721098 - Mean Reward 749.17 - Mean Length 187.75 - Mean Loss 1.62 - Mean Q Value 41.041 - Time Delta 45.3 - Time 2023-11-03T19:01:22


 28%|██▊       | 11080/40000 [8:42:16<23:33:53,  2.93s/it]

Episode 11080 - Step 2169807 - Epsilon 0.5813212099268737 - Mean Reward 766.41 - Mean Length 212.71 - Mean Loss 1.602 - Mean Q Value 40.982 - Time Delta 76.805 - Time 2023-11-03T19:02:39


 28%|██▊       | 11100/40000 [8:43:03<13:08:20,  1.64s/it]

Episode 11100 - Step 2172865 - Epsilon 0.5808769596425996 - Mean Reward 767.3 - Mean Length 204.92 - Mean Loss 1.623 - Mean Q Value 40.907 - Time Delta 43.514 - Time 2023-11-03T19:03:22


 28%|██▊       | 11101/40000 [8:43:04<11:55:02,  1.48s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2172865


 28%|██▊       | 11120/40000 [8:43:41<11:43:49,  1.46s/it]

Episode 11120 - Step 2175750 - Epsilon 0.5804581531332947 - Mean Reward 725.35 - Mean Length 196.95 - Mean Loss 1.642 - Mean Q Value 40.854 - Time Delta 41.011 - Time 2023-11-03T19:04:03


 28%|██▊       | 11140/40000 [8:45:04<53:48:19,  6.71s/it]

Episode 11140 - Step 2181660 - Epsilon 0.5796011593648456 - Mean Reward 768.15 - Mean Length 204.4 - Mean Loss 1.631 - Mean Q Value 40.877 - Time Delta 85.219 - Time 2023-11-03T19:05:28


 28%|██▊       | 11160/40000 [8:45:52<22:02:53,  2.75s/it]

Episode 11160 - Step 2184721 - Epsilon 0.5791577891883087 - Mean Reward 760.94 - Mean Length 203.54 - Mean Loss 1.628 - Mean Q Value 40.851 - Time Delta 43.978 - Time 2023-11-03T19:06:12


 28%|██▊       | 11180/40000 [8:46:41<24:22:36,  3.04s/it]

Episode 11180 - Step 2188295 - Epsilon 0.5786405427531947 - Mean Reward 764.54 - Mean Length 184.88 - Mean Loss 1.652 - Mean Q Value 40.88 - Time Delta 51.011 - Time 2023-11-03T19:07:03


 28%|██▊       | 11200/40000 [8:47:34<16:04:30,  2.01s/it]

Episode 11200 - Step 2192047 - Epsilon 0.5780980323333671 - Mean Reward 771.11 - Mean Length 191.82 - Mean Loss 1.665 - Mean Q Value 40.899 - Time Delta 54.039 - Time 2023-11-03T19:07:57


 28%|██▊       | 11201/40000 [8:47:39<24:43:09,  3.09s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2192047


 28%|██▊       | 11220/40000 [8:48:19<16:38:22,  2.08s/it]

Episode 11220 - Step 2195038 - Epsilon 0.5776659210513538 - Mean Reward 785.83 - Mean Length 192.88 - Mean Loss 1.661 - Mean Q Value 41.037 - Time Delta 43.428 - Time 2023-11-03T19:08:41


 28%|██▊       | 11240/40000 [8:49:26<20:43:05,  2.59s/it]

Episode 11240 - Step 2199635 - Epsilon 0.5770024197460184 - Mean Reward 755.8 - Mean Length 179.75 - Mean Loss 1.679 - Mean Q Value 41.129 - Time Delta 66.149 - Time 2023-11-03T19:09:47


 28%|██▊       | 11260/40000 [8:50:34<24:22:53,  3.05s/it]

Episode 11260 - Step 2204536 - Epsilon 0.5762958803748821 - Mean Reward 791.69 - Mean Length 198.15 - Mean Loss 1.693 - Mean Q Value 41.307 - Time Delta 70.623 - Time 2023-11-03T19:10:58


 28%|██▊       | 11280/40000 [8:51:15<25:01:15,  3.14s/it]

Episode 11280 - Step 2207055 - Epsilon 0.5759330722500126 - Mean Reward 757.98 - Mean Length 187.6 - Mean Loss 1.672 - Mean Q Value 41.563 - Time Delta 36.499 - Time 2023-11-03T19:11:34


 28%|██▊       | 11300/40000 [8:52:08<32:05:30,  4.03s/it]

Episode 11300 - Step 2210875 - Epsilon 0.5753833186463482 - Mean Reward 769.94 - Mean Length 188.28 - Mean Loss 1.643 - Mean Q Value 41.821 - Time Delta 54.77 - Time 2023-11-03T19:12:29


 28%|██▊       | 11301/40000 [8:52:11<27:45:30,  3.48s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2210875


 28%|██▊       | 11320/40000 [8:52:58<25:37:36,  3.22s/it]

Episode 11320 - Step 2214480 - Epsilon 0.5748649879733999 - Mean Reward 773.17 - Mean Length 194.42 - Mean Loss 1.651 - Mean Q Value 41.879 - Time Delta 51.645 - Time 2023-11-03T19:13:21


 28%|██▊       | 11340/40000 [8:53:42<14:58:16,  1.88s/it]

Episode 11340 - Step 2217341 - Epsilon 0.5744539627497337 - Mean Reward 722.0 - Mean Length 177.06 - Mean Loss 1.651 - Mean Q Value 42.008 - Time Delta 41.219 - Time 2023-11-03T19:14:02


 28%|██▊       | 11360/40000 [8:54:40<20:16:13,  2.55s/it]

Episode 11360 - Step 2221663 - Epsilon 0.5738336003764782 - Mean Reward 719.12 - Mean Length 171.27 - Mean Loss 1.647 - Mean Q Value 42.174 - Time Delta 63.463 - Time 2023-11-03T19:15:05


 28%|██▊       | 11380/40000 [8:55:35<23:52:30,  3.00s/it]

Episode 11380 - Step 2224955 - Epsilon 0.573361529548085 - Mean Reward 714.29 - Mean Length 179.0 - Mean Loss 1.643 - Mean Q Value 42.201 - Time Delta 49.258 - Time 2023-11-03T19:15:55


 28%|██▊       | 11400/40000 [8:56:19<16:02:26,  2.02s/it]

Episode 11400 - Step 2228354 - Epsilon 0.5728745224738439 - Mean Reward 708.87 - Mean Length 174.79 - Mean Loss 1.637 - Mean Q Value 42.224 - Time Delta 50.449 - Time 2023-11-03T19:16:45


 29%|██▊       | 11401/40000 [8:56:27<29:41:11,  3.74s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2228354


 29%|██▊       | 11420/40000 [8:57:27<17:02:13,  2.15s/it]

Episode 11420 - Step 2232743 - Epsilon 0.5722462805576944 - Mean Reward 722.73 - Mean Length 182.63 - Mean Loss 1.64 - Mean Q Value 42.296 - Time Delta 65.225 - Time 2023-11-03T19:17:50


 29%|██▊       | 11440/40000 [8:58:21<26:16:20,  3.31s/it]

Episode 11440 - Step 2236217 - Epsilon 0.5717495003588872 - Mean Reward 730.07 - Mean Length 188.76 - Mean Loss 1.639 - Mean Q Value 42.217 - Time Delta 51.653 - Time 2023-11-03T19:18:42


 29%|██▊       | 11460/40000 [8:59:05<14:35:28,  1.84s/it]

Episode 11460 - Step 2239141 - Epsilon 0.5713317041449633 - Mean Reward 695.35 - Mean Length 174.78 - Mean Loss 1.643 - Mean Q Value 42.129 - Time Delta 43.326 - Time 2023-11-03T19:19:25


 29%|██▊       | 11480/40000 [8:59:45<13:20:57,  1.69s/it]

Episode 11480 - Step 2242015 - Epsilon 0.5709213497016696 - Mean Reward 705.7 - Mean Length 170.6 - Mean Loss 1.694 - Mean Q Value 42.066 - Time Delta 42.524 - Time 2023-11-03T19:20:08


 29%|██▉       | 11500/40000 [9:00:37<13:41:42,  1.73s/it]

Episode 11500 - Step 2245382 - Epsilon 0.5704409787999126 - Mean Reward 697.5 - Mean Length 170.28 - Mean Loss 1.747 - Mean Q Value 41.993 - Time Delta 49.426 - Time 2023-11-03T19:20:57


 29%|██▉       | 11501/40000 [9:00:39<14:28:20,  1.83s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2245382


 29%|██▉       | 11520/40000 [9:01:33<35:15:08,  4.46s/it]

Episode 11520 - Step 2249127 - Epsilon 0.5699071533027658 - Mean Reward 689.84 - Mean Length 163.84 - Mean Loss 1.774 - Mean Q Value 41.854 - Time Delta 55.691 - Time 2023-11-03T19:21:53


 29%|██▉       | 11540/40000 [9:02:54<27:18:56,  3.46s/it]

Episode 11540 - Step 2254934 - Epsilon 0.5690803907609643 - Mean Reward 740.67 - Mean Length 187.17 - Mean Loss 1.827 - Mean Q Value 41.84 - Time Delta 84.837 - Time 2023-11-03T19:23:18


 29%|██▉       | 11560/40000 [9:03:58<12:40:45,  1.60s/it]

Episode 11560 - Step 2259313 - Epsilon 0.5684577308159313 - Mean Reward 777.07 - Mean Length 201.72 - Mean Loss 1.881 - Mean Q Value 41.716 - Time Delta 64.184 - Time 2023-11-03T19:24:22


 29%|██▉       | 11580/40000 [9:04:57<19:50:48,  2.51s/it]

Episode 11580 - Step 2263503 - Epsilon 0.5678625830314984 - Mean Reward 798.65 - Mean Length 214.88 - Mean Loss 1.929 - Mean Q Value 41.618 - Time Delta 61.261 - Time 2023-11-03T19:25:23


 29%|██▉       | 11600/40000 [9:05:52<23:34:34,  2.99s/it]

Episode 11600 - Step 2266928 - Epsilon 0.567376558742798 - Mean Reward 797.91 - Mean Length 215.46 - Mean Loss 1.973 - Mean Q Value 41.531 - Time Delta 50.026 - Time 2023-11-03T19:26:13


 29%|██▉       | 11601/40000 [9:05:55<22:41:37,  2.88s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2266928


 29%|██▉       | 11620/40000 [9:06:42<16:10:01,  2.05s/it]

Episode 11620 - Step 2270259 - Epsilon 0.5669042725298689 - Mean Reward 785.23 - Mean Length 211.32 - Mean Loss 2.012 - Mean Q Value 41.575 - Time Delta 48.282 - Time 2023-11-03T19:27:01


 29%|██▉       | 11640/40000 [9:07:28<16:57:38,  2.15s/it]

Episode 11640 - Step 2273657 - Epsilon 0.5664228917855328 - Mean Reward 739.37 - Mean Length 187.23 - Mean Loss 2.028 - Mean Q Value 41.591 - Time Delta 49.318 - Time 2023-11-03T19:27:51


 29%|██▉       | 11660/40000 [9:08:27<34:15:54,  4.35s/it]

Episode 11660 - Step 2277825 - Epsilon 0.5658329864525247 - Mean Reward 739.31 - Mean Length 185.12 - Mean Loss 2.031 - Mean Q Value 41.703 - Time Delta 59.654 - Time 2023-11-03T19:28:50


 29%|██▉       | 11680/40000 [9:09:22<16:32:03,  2.10s/it]

Episode 11680 - Step 2281597 - Epsilon 0.565299657383737 - Mean Reward 715.91 - Mean Length 180.94 - Mean Loss 2.033 - Mean Q Value 41.675 - Time Delta 54.159 - Time 2023-11-03T19:29:45


 29%|██▉       | 11700/40000 [9:09:58<10:50:12,  1.38s/it]

Episode 11700 - Step 2283989 - Epsilon 0.5649617092029087 - Mean Reward 690.57 - Mean Length 170.61 - Mean Loss 2.041 - Mean Q Value 41.708 - Time Delta 35.121 - Time 2023-11-03T19:30:20


 29%|██▉       | 11701/40000 [9:10:02<15:18:19,  1.95s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2283989


 29%|██▉       | 11720/40000 [9:11:04<23:32:26,  3.00s/it]

Episode 11720 - Step 2288536 - Epsilon 0.5643198537833395 - Mean Reward 748.86 - Mean Length 182.77 - Mean Loss 2.022 - Mean Q Value 41.714 - Time Delta 65.434 - Time 2023-11-03T19:31:25


 29%|██▉       | 11740/40000 [9:12:04<21:17:42,  2.71s/it]

Episode 11740 - Step 2292549 - Epsilon 0.5637539837212519 - Mean Reward 766.99 - Mean Length 188.92 - Mean Loss 2.032 - Mean Q Value 41.706 - Time Delta 57.818 - Time 2023-11-03T19:32:23


 29%|██▉       | 11760/40000 [9:13:00<28:33:20,  3.64s/it]

Episode 11760 - Step 2296486 - Epsilon 0.5631993817716292 - Mean Reward 734.27 - Mean Length 186.61 - Mean Loss 2.03 - Mean Q Value 41.64 - Time Delta 56.653 - Time 2023-11-03T19:33:20


 29%|██▉       | 11780/40000 [9:13:51<16:32:25,  2.11s/it]

Episode 11780 - Step 2300173 - Epsilon 0.5626804918566779 - Mean Reward 759.5 - Mean Length 185.76 - Mean Loss 2.021 - Mean Q Value 41.674 - Time Delta 52.985 - Time 2023-11-03T19:34:13


 30%|██▉       | 11800/40000 [9:14:44<20:55:44,  2.67s/it]

Episode 11800 - Step 2303835 - Epsilon 0.5621655935326212 - Mean Reward 798.54 - Mean Length 198.46 - Mean Loss 1.979 - Mean Q Value 41.666 - Time Delta 53.154 - Time 2023-11-03T19:35:06


 30%|██▉       | 11801/40000 [9:14:47<22:08:53,  2.83s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2303835


 30%|██▉       | 11820/40000 [9:15:44<27:18:55,  3.49s/it]

Episode 11820 - Step 2307857 - Epsilon 0.5616006200456454 - Mean Reward 765.72 - Mean Length 193.21 - Mean Loss 1.956 - Mean Q Value 41.57 - Time Delta 57.956 - Time 2023-11-03T19:36:04


 30%|██▉       | 11840/40000 [9:16:33<21:21:19,  2.73s/it]

Episode 11840 - Step 2311289 - Epsilon 0.5611189733093657 - Mean Reward 759.72 - Mean Length 187.4 - Mean Loss 1.885 - Mean Q Value 41.574 - Time Delta 48.755 - Time 2023-11-03T19:36:52


 30%|██▉       | 11860/40000 [9:17:26<20:49:47,  2.66s/it]

Episode 11860 - Step 2315159 - Epsilon 0.5605763531696042 - Mean Reward 757.8 - Mean Length 186.73 - Mean Loss 1.859 - Mean Q Value 41.556 - Time Delta 55.024 - Time 2023-11-03T19:37:47


 30%|██▉       | 11880/40000 [9:18:19<21:13:00,  2.72s/it]

Episode 11880 - Step 2318796 - Epsilon 0.5600668807107478 - Mean Reward 750.6 - Mean Length 186.23 - Mean Loss 1.783 - Mean Q Value 41.59 - Time Delta 52.191 - Time 2023-11-03T19:38:40


 30%|██▉       | 11900/40000 [9:19:23<21:22:23,  2.74s/it]

Episode 11900 - Step 2323135 - Epsilon 0.5594596774773061 - Mean Reward 734.09 - Mean Length 193.0 - Mean Loss 1.787 - Mean Q Value 41.45 - Time Delta 62.713 - Time 2023-11-03T19:39:42


 30%|██▉       | 11901/40000 [9:19:24<17:24:54,  2.23s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2323135


 30%|██▉       | 11920/40000 [9:20:07<23:22:33,  3.00s/it]

Episode 11920 - Step 2326166 - Epsilon 0.5590359074298211 - Mean Reward 699.43 - Mean Length 183.09 - Mean Loss 1.791 - Mean Q Value 41.348 - Time Delta 43.934 - Time 2023-11-03T19:40:26


 30%|██▉       | 11940/40000 [9:20:57<19:09:17,  2.46s/it]

Episode 11940 - Step 2329650 - Epsilon 0.558549199085796 - Mean Reward 682.3 - Mean Length 183.61 - Mean Loss 1.805 - Mean Q Value 41.144 - Time Delta 50.613 - Time 2023-11-03T19:41:17


 30%|██▉       | 11960/40000 [9:21:47<19:43:53,  2.53s/it]

Episode 11960 - Step 2333133 - Epsilon 0.5580630539959904 - Mean Reward 683.49 - Mean Length 179.74 - Mean Loss 1.798 - Mean Q Value 41.119 - Time Delta 50.021 - Time 2023-11-03T19:42:07


 30%|██▉       | 11980/40000 [9:22:38<22:00:36,  2.83s/it]

Episode 11980 - Step 2336699 - Epsilon 0.5575657624217778 - Mean Reward 696.33 - Mean Length 179.03 - Mean Loss 1.816 - Mean Q Value 41.088 - Time Delta 51.632 - Time 2023-11-03T19:42:59


 30%|███       | 12000/40000 [9:23:40<25:05:01,  3.23s/it]

Episode 12000 - Step 2341096 - Epsilon 0.556953194924873 - Mean Reward 704.25 - Mean Length 179.61 - Mean Loss 1.796 - Mean Q Value 41.165 - Time Delta 62.859 - Time 2023-11-03T19:44:01


 30%|███       | 12001/40000 [9:23:43<24:14:22,  3.12s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2341096


 30%|███       | 12020/40000 [9:24:46<20:09:19,  2.59s/it]

Episode 12020 - Step 2345703 - Epsilon 0.5563120932676766 - Mean Reward 758.28 - Mean Length 195.37 - Mean Loss 1.808 - Mean Q Value 41.168 - Time Delta 66.669 - Time 2023-11-03T19:45:08


 30%|███       | 12040/40000 [9:25:36<15:59:37,  2.06s/it]

Episode 12040 - Step 2349049 - Epsilon 0.5558469327239627 - Mean Reward 760.6 - Mean Length 193.99 - Mean Loss 1.825 - Mean Q Value 41.15 - Time Delta 47.54 - Time 2023-11-03T19:45:56


 30%|███       | 12060/40000 [9:26:19<17:22:41,  2.24s/it]

Episode 12060 - Step 2352148 - Epsilon 0.555416457036019 - Mean Reward 751.67 - Mean Length 190.15 - Mean Loss 1.853 - Mean Q Value 40.989 - Time Delta 44.642 - Time 2023-11-03T19:46:40


 30%|███       | 12080/40000 [9:27:31<30:14:16,  3.90s/it]

Episode 12080 - Step 2357397 - Epsilon 0.5546880897036609 - Mean Reward 743.0 - Mean Length 206.98 - Mean Loss 1.886 - Mean Q Value 40.857 - Time Delta 73.157 - Time 2023-11-03T19:47:53


 30%|███       | 12100/40000 [9:28:19<19:15:11,  2.48s/it]

Episode 12100 - Step 2360839 - Epsilon 0.5542109858461352 - Mean Reward 727.03 - Mean Length 197.43 - Mean Loss 1.94 - Mean Q Value 40.818 - Time Delta 48.19 - Time 2023-11-03T19:48:42


 30%|███       | 12101/40000 [9:28:23<23:08:20,  2.99s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2360839


 30%|███       | 12120/40000 [9:29:18<18:29:43,  2.39s/it]

Episode 12120 - Step 2364798 - Epsilon 0.5536627268187275 - Mean Reward 703.54 - Mean Length 190.95 - Mean Loss 1.949 - Mean Q Value 40.941 - Time Delta 55.371 - Time 2023-11-03T19:49:37


 30%|███       | 12140/40000 [9:30:07<17:04:30,  2.21s/it]

Episode 12140 - Step 2368482 - Epsilon 0.5531530381312145 - Mean Reward 723.76 - Mean Length 194.33 - Mean Loss 1.98 - Mean Q Value 41.053 - Time Delta 52.009 - Time 2023-11-03T19:50:29


 30%|███       | 12160/40000 [9:30:50<23:57:28,  3.10s/it]

Episode 12160 - Step 2371455 - Epsilon 0.5527420598329228 - Mean Reward 720.8 - Mean Length 193.07 - Mean Loss 1.977 - Mean Q Value 41.192 - Time Delta 42.761 - Time 2023-11-03T19:51:12


 30%|███       | 12180/40000 [9:31:41<17:50:16,  2.31s/it]

Episode 12180 - Step 2374880 - Epsilon 0.5522689769525038 - Mean Reward 719.95 - Mean Length 174.83 - Mean Loss 2.021 - Mean Q Value 41.293 - Time Delta 49.213 - Time 2023-11-03T19:52:01


 30%|███       | 12200/40000 [9:32:27<17:54:22,  2.32s/it]

Episode 12200 - Step 2378303 - Epsilon 0.5517965748742288 - Mean Reward 730.22 - Mean Length 174.64 - Mean Loss 2.001 - Mean Q Value 41.336 - Time Delta 48.949 - Time 2023-11-03T19:52:50


 31%|███       | 12201/40000 [9:32:32<24:23:55,  3.16s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2378303


 31%|███       | 12220/40000 [9:33:19<20:23:57,  2.64s/it]

Episode 12220 - Step 2381786 - Epsilon 0.551316307073496 - Mean Reward 740.57 - Mean Length 169.88 - Mean Loss 2.046 - Mean Q Value 41.471 - Time Delta 49.767 - Time 2023-11-03T19:53:40


 31%|███       | 12240/40000 [9:34:04<12:42:06,  1.65s/it]

Episode 12240 - Step 2385089 - Epsilon 0.5508612454853941 - Mean Reward 729.57 - Mean Length 166.07 - Mean Loss 2.098 - Mean Q Value 41.722 - Time Delta 47.209 - Time 2023-11-03T19:54:27


 31%|███       | 12260/40000 [9:34:44<17:00:55,  2.21s/it]

Episode 12260 - Step 2387654 - Epsilon 0.5505081189008473 - Mean Reward 717.47 - Mean Length 161.99 - Mean Loss 2.129 - Mean Q Value 41.944 - Time Delta 36.727 - Time 2023-11-03T19:55:04


 31%|███       | 12280/40000 [9:35:31<15:27:42,  2.01s/it]

Episode 12280 - Step 2391163 - Epsilon 0.5500253973578293 - Mean Reward 725.0 - Mean Length 162.83 - Mean Loss 2.157 - Mean Q Value 42.024 - Time Delta 50.263 - Time 2023-11-03T19:55:54


 31%|███       | 12300/40000 [9:36:21<17:38:26,  2.29s/it]

Episode 12300 - Step 2394899 - Epsilon 0.5495119134067716 - Mean Reward 742.91 - Mean Length 165.96 - Mean Loss 2.234 - Mean Q Value 42.217 - Time Delta 53.385 - Time 2023-11-03T19:56:47


 31%|███       | 12301/40000 [9:36:29<30:27:18,  3.96s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2394899


 31%|███       | 12320/40000 [9:37:20<14:45:13,  1.92s/it]

Episode 12320 - Step 2399032 - Epsilon 0.5489444233812516 - Mean Reward 746.41 - Mean Length 172.46 - Mean Loss 2.257 - Mean Q Value 42.31 - Time Delta 59.384 - Time 2023-11-03T19:57:47


 31%|███       | 12340/40000 [9:38:14<12:17:33,  1.60s/it]

Episode 12340 - Step 2402502 - Epsilon 0.5484684205304925 - Mean Reward 729.95 - Mean Length 174.13 - Mean Loss 2.24 - Mean Q Value 42.344 - Time Delta 50.26 - Time 2023-11-03T19:58:37


 31%|███       | 12360/40000 [9:39:07<15:12:21,  1.98s/it]

Episode 12360 - Step 2406093 - Epsilon 0.5479762538987252 - Mean Reward 773.73 - Mean Length 184.39 - Mean Loss 2.229 - Mean Q Value 42.364 - Time Delta 51.22 - Time 2023-11-03T19:59:28


 31%|███       | 12380/40000 [9:40:03<24:42:18,  3.22s/it]

Episode 12380 - Step 2410102 - Epsilon 0.5474273197601315 - Mean Reward 770.65 - Mean Length 189.39 - Mean Loss 2.173 - Mean Q Value 42.532 - Time Delta 56.639 - Time 2023-11-03T20:00:25


 31%|███       | 12400/40000 [9:41:09<23:12:14,  3.03s/it]

Episode 12400 - Step 2414570 - Epsilon 0.546816184749926 - Mean Reward 781.28 - Mean Length 196.71 - Mean Loss 2.151 - Mean Q Value 42.538 - Time Delta 63.429 - Time 2023-11-03T20:01:28


 31%|███       | 12401/40000 [9:41:10<18:36:52,  2.43s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2414570


 31%|███       | 12420/40000 [9:41:57<24:24:40,  3.19s/it]

Episode 12420 - Step 2417959 - Epsilon 0.5463530908852859 - Mean Reward 757.37 - Mean Length 189.27 - Mean Loss 2.117 - Mean Q Value 42.526 - Time Delta 49.303 - Time 2023-11-03T20:02:17


 31%|███       | 12440/40000 [9:42:44<13:54:30,  1.82s/it]

Episode 12440 - Step 2421313 - Epsilon 0.5458951657732063 - Mean Reward 772.56 - Mean Length 188.11 - Mean Loss 2.106 - Mean Q Value 42.486 - Time Delta 48.108 - Time 2023-11-03T20:03:06


 31%|███       | 12460/40000 [9:43:39<26:43:03,  3.49s/it]

Episode 12460 - Step 2425137 - Epsilon 0.545373539306628 - Mean Reward 761.08 - Mean Length 190.44 - Mean Loss 2.156 - Mean Q Value 42.544 - Time Delta 54.315 - Time 2023-11-03T20:04:00


 31%|███       | 12480/40000 [9:44:21<13:04:45,  1.71s/it]

Episode 12480 - Step 2427931 - Epsilon 0.5449927288553054 - Mean Reward 732.44 - Mean Length 178.29 - Mean Loss 2.187 - Mean Q Value 42.581 - Time Delta 40.159 - Time 2023-11-03T20:04:40


 31%|███▏      | 12500/40000 [9:45:04<23:53:05,  3.13s/it]

Episode 12500 - Step 2430858 - Epsilon 0.5445940762508892 - Mean Reward 700.92 - Mean Length 162.88 - Mean Loss 2.181 - Mean Q Value 42.68 - Time Delta 43.417 - Time 2023-11-03T20:05:23


 31%|███▏      | 12501/40000 [9:45:05<19:14:01,  2.52s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2430858


 31%|███▏      | 12520/40000 [9:46:13<32:44:13,  4.29s/it]

Episode 12520 - Step 2435595 - Epsilon 0.5439495223672272 - Mean Reward 740.17 - Mean Length 176.36 - Mean Loss 2.179 - Mean Q Value 42.706 - Time Delta 71.47 - Time 2023-11-03T20:06:35


 31%|███▏      | 12540/40000 [9:47:05<20:11:52,  2.65s/it]

Episode 12540 - Step 2438878 - Epsilon 0.5435032589013218 - Mean Reward 737.7 - Mean Length 175.65 - Mean Loss 2.17 - Mean Q Value 42.79 - Time Delta 50.915 - Time 2023-11-03T20:07:26


 31%|███▏      | 12560/40000 [9:48:13<16:16:57,  2.14s/it]

Episode 12560 - Step 2443371 - Epsilon 0.5428931115275776 - Mean Reward 747.72 - Mean Length 182.34 - Mean Loss 2.12 - Mean Q Value 42.779 - Time Delta 65.993 - Time 2023-11-03T20:08:32


 31%|███▏      | 12580/40000 [9:49:10<31:50:49,  4.18s/it]

Episode 12580 - Step 2447357 - Epsilon 0.54235238793455 - Mean Reward 763.76 - Mean Length 194.26 - Mean Loss 2.079 - Mean Q Value 42.745 - Time Delta 57.02 - Time 2023-11-03T20:09:29


 32%|███▏      | 12600/40000 [9:49:51<15:17:17,  2.01s/it]

Episode 12600 - Step 2450220 - Epsilon 0.5419643380542429 - Mean Reward 753.56 - Mean Length 193.62 - Mean Loss 2.041 - Mean Q Value 42.742 - Time Delta 41.882 - Time 2023-11-03T20:10:11


 32%|███▏      | 12601/40000 [9:49:52<13:01:33,  1.71s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2450220


 32%|███▏      | 12620/40000 [9:50:51<15:50:28,  2.08s/it]

Episode 12620 - Step 2454455 - Epsilon 0.5413908368903072 - Mean Reward 711.34 - Mean Length 188.6 - Mean Loss 2.017 - Mean Q Value 42.767 - Time Delta 61.259 - Time 2023-11-03T20:11:12


 32%|███▏      | 12640/40000 [9:51:49<17:25:50,  2.29s/it]

Episode 12640 - Step 2458388 - Epsilon 0.540858775900704 - Mean Reward 701.51 - Mean Length 195.1 - Mean Loss 1.972 - Mean Q Value 42.811 - Time Delta 58.325 - Time 2023-11-03T20:12:10


 32%|███▏      | 12660/40000 [9:52:44<15:43:21,  2.07s/it]

Episode 12660 - Step 2462029 - Epsilon 0.5403666831360568 - Mean Reward 679.0 - Mean Length 186.58 - Mean Loss 1.961 - Mean Q Value 42.879 - Time Delta 53.863 - Time 2023-11-03T20:13:04


 32%|███▏      | 12680/40000 [9:53:34<15:33:13,  2.05s/it]

Episode 12680 - Step 2465545 - Epsilon 0.5398919094557704 - Mean Reward 675.86 - Mean Length 181.88 - Mean Loss 2.005 - Mean Q Value 42.978 - Time Delta 50.986 - Time 2023-11-03T20:13:55


 32%|███▏      | 12700/40000 [9:54:21<23:49:15,  3.14s/it]

Episode 12700 - Step 2468620 - Epsilon 0.5394770269893376 - Mean Reward 695.56 - Mean Length 184.0 - Mean Loss 2.012 - Mean Q Value 43.083 - Time Delta 45.653 - Time 2023-11-03T20:14:41


 32%|███▏      | 12701/40000 [9:54:23<21:24:51,  2.82s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2468620


 32%|███▏      | 12720/40000 [9:55:03<18:28:21,  2.44s/it]

Episode 12720 - Step 2471375 - Epsilon 0.5391055900687909 - Mean Reward 687.06 - Mean Length 169.2 - Mean Loss 2.066 - Mean Q Value 43.141 - Time Delta 42.772 - Time 2023-11-03T20:15:24


 32%|███▏      | 12740/40000 [9:55:56<21:05:27,  2.79s/it]

Episode 12740 - Step 2474877 - Epsilon 0.5386338096176754 - Mean Reward 718.85 - Mean Length 164.89 - Mean Loss 2.11 - Mean Q Value 43.19 - Time Delta 52.713 - Time 2023-11-03T20:16:16


 32%|███▏      | 12760/40000 [9:56:54<28:45:01,  3.80s/it]

Episode 12760 - Step 2478710 - Epsilon 0.5381179109244804 - Mean Reward 753.88 - Mean Length 166.81 - Mean Loss 2.101 - Mean Q Value 43.261 - Time Delta 56.506 - Time 2023-11-03T20:17:13


 32%|███▏      | 12780/40000 [9:57:45<20:22:29,  2.69s/it]

Episode 12780 - Step 2482163 - Epsilon 0.5376535810249964 - Mean Reward 765.41 - Mean Length 166.18 - Mean Loss 2.092 - Mean Q Value 43.349 - Time Delta 51.738 - Time 2023-11-03T20:18:05


 32%|███▏      | 12800/40000 [9:58:32<15:35:07,  2.06s/it]

Episode 12800 - Step 2485687 - Epsilon 0.5371801167524972 - Mean Reward 767.71 - Mean Length 170.67 - Mean Loss 2.093 - Mean Q Value 43.448 - Time Delta 51.184 - Time 2023-11-03T20:18:56


 32%|███▏      | 12801/40000 [9:58:38<23:13:21,  3.07s/it]

MarioNet saved to checkpoints2/mario_net_4.chkpt at step 2485687


 32%|███▏      | 12820/40000 [9:59:24<14:15:16,  1.89s/it]

Episode 12820 - Step 2489234 - Epsilon 0.5367039833612267 - Mean Reward 800.35 - Mean Length 178.59 - Mean Loss 2.039 - Mean Q Value 43.55 - Time Delta 51.366 - Time 2023-11-03T20:19:47


 32%|███▏      | 12840/40000 [10:00:10<23:01:39,  3.05s/it]

Episode 12840 - Step 2492329 - Epsilon 0.5362888692201447 - Mean Reward 777.64 - Mean Length 174.52 - Mean Loss 2.066 - Mean Q Value 43.655 - Time Delta 45.848 - Time 2023-11-03T20:20:33


 32%|███▏      | 12860/40000 [10:01:19<25:16:39,  3.35s/it]

Episode 12860 - Step 2496882 - Epsilon 0.5356787856181083 - Mean Reward 779.18 - Mean Length 181.72 - Mean Loss 2.111 - Mean Q Value 43.689 - Time Delta 66.887 - Time 2023-11-03T20:21:40


 32%|███▏      | 12879/40000 [10:02:07<16:18:13,  2.16s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2500000


 32%|███▏      | 12880/40000 [10:02:10<18:28:20,  2.45s/it]

Episode 12880 - Step 2500341 - Epsilon 0.5352157575609121 - Mean Reward 798.53 - Mean Length 181.78 - Mean Loss 2.126 - Mean Q Value 43.642 - Time Delta 51.202 - Time 2023-11-03T20:22:31


 32%|███▏      | 12900/40000 [10:02:56<19:48:42,  2.63s/it]

Episode 12900 - Step 2503476 - Epsilon 0.5347964464974383 - Mean Reward 788.94 - Mean Length 177.89 - Mean Loss 2.151 - Mean Q Value 43.576 - Time Delta 45.932 - Time 2023-11-03T20:23:17


 32%|███▏      | 12901/40000 [10:02:59<20:12:32,  2.68s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2503476


 32%|███▏      | 12920/40000 [10:03:42<14:46:55,  1.97s/it]

Episode 12920 - Step 2506783 - Epsilon 0.5343544862005014 - Mean Reward 778.98 - Mean Length 175.49 - Mean Loss 2.192 - Mean Q Value 43.712 - Time Delta 48.333 - Time 2023-11-03T20:24:05


 32%|███▏      | 12940/40000 [10:04:38<16:15:23,  2.16s/it]

Episode 12940 - Step 2510624 - Epsilon 0.5338416185209506 - Mean Reward 766.91 - Mean Length 182.95 - Mean Loss 2.19 - Mean Q Value 43.709 - Time Delta 55.498 - Time 2023-11-03T20:25:01


 32%|███▏      | 12960/40000 [10:05:34<14:49:12,  1.97s/it]

Episode 12960 - Step 2514387 - Epsilon 0.533339643109217 - Mean Reward 753.89 - Mean Length 175.05 - Mean Loss 2.199 - Mean Q Value 43.853 - Time Delta 54.36 - Time 2023-11-03T20:25:55


 32%|███▏      | 12980/40000 [10:06:18<21:50:58,  2.91s/it]

Episode 12980 - Step 2517410 - Epsilon 0.5329367238957918 - Mean Reward 732.52 - Mean Length 170.69 - Mean Loss 2.201 - Mean Q Value 44.05 - Time Delta 43.932 - Time 2023-11-03T20:26:39


 32%|███▎      | 13000/40000 [10:07:03<14:24:11,  1.92s/it]

Episode 13000 - Step 2520475 - Epsilon 0.5325285174940746 - Mean Reward 724.35 - Mean Length 169.99 - Mean Loss 2.198 - Mean Q Value 44.205 - Time Delta 45.119 - Time 2023-11-03T20:27:24


 33%|███▎      | 13001/40000 [10:07:06<16:59:04,  2.26s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2520475


 33%|███▎      | 13020/40000 [10:07:55<19:32:08,  2.61s/it]

Episode 13020 - Step 2524244 - Epsilon 0.5320269787602471 - Mean Reward 704.49 - Mean Length 174.61 - Mean Loss 2.19 - Mean Q Value 44.166 - Time Delta 54.497 - Time 2023-11-03T20:28:19


 33%|███▎      | 13040/40000 [10:08:45<14:21:23,  1.92s/it]

Episode 13040 - Step 2527534 - Epsilon 0.5315895664259589 - Mean Reward 710.99 - Mean Length 169.1 - Mean Loss 2.19 - Mean Q Value 44.196 - Time Delta 47.214 - Time 2023-11-03T20:29:06


 33%|███▎      | 13060/40000 [10:09:39<18:49:13,  2.51s/it]

Episode 13060 - Step 2531166 - Epsilon 0.5311071021112099 - Mean Reward 704.81 - Mean Length 167.79 - Mean Loss 2.192 - Mean Q Value 44.145 - Time Delta 51.886 - Time 2023-11-03T20:29:58


 33%|███▎      | 13080/40000 [10:10:30<14:12:28,  1.90s/it]

Episode 13080 - Step 2534915 - Epsilon 0.5306095551168771 - Mean Reward 703.04 - Mean Length 175.05 - Mean Loss 2.206 - Mean Q Value 44.032 - Time Delta 53.29 - Time 2023-11-03T20:30:51


 33%|███▎      | 13100/40000 [10:11:24<22:11:11,  2.97s/it]

Episode 13100 - Step 2538601 - Epsilon 0.5301208235683208 - Mean Reward 718.99 - Mean Length 181.26 - Mean Loss 2.221 - Mean Q Value 43.965 - Time Delta 52.318 - Time 2023-11-03T20:31:43


 33%|███▎      | 13101/40000 [10:11:25<17:45:47,  2.38s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2538601


 33%|███▎      | 13120/40000 [10:12:17<15:36:32,  2.09s/it]

Episode 13120 - Step 2542531 - Epsilon 0.5296002355747478 - Mean Reward 742.87 - Mean Length 182.87 - Mean Loss 2.237 - Mean Q Value 43.884 - Time Delta 56.279 - Time 2023-11-03T20:32:40


 33%|███▎      | 13140/40000 [10:13:30<33:46:37,  4.53s/it]

Episode 13140 - Step 2547759 - Epsilon 0.5289085001278767 - Mean Reward 763.59 - Mean Length 202.25 - Mean Loss 2.245 - Mean Q Value 43.824 - Time Delta 74.495 - Time 2023-11-03T20:33:54


 33%|███▎      | 13160/40000 [10:14:22<11:57:09,  1.60s/it]

Episode 13160 - Step 2551068 - Epsilon 0.5284711414438419 - Mean Reward 753.81 - Mean Length 199.02 - Mean Loss 2.25 - Mean Q Value 43.718 - Time Delta 47.551 - Time 2023-11-03T20:34:42


 33%|███▎      | 13180/40000 [10:15:09<14:28:36,  1.94s/it]

Episode 13180 - Step 2554348 - Epsilon 0.5280379726771183 - Mean Reward 742.72 - Mean Length 194.33 - Mean Loss 2.255 - Mean Q Value 43.582 - Time Delta 48.019 - Time 2023-11-03T20:35:30


 33%|███▎      | 13200/40000 [10:16:10<19:51:27,  2.67s/it]

Episode 13200 - Step 2558471 - Epsilon 0.5274939778781939 - Mean Reward 765.02 - Mean Length 198.7 - Mean Loss 2.262 - Mean Q Value 43.532 - Time Delta 60.458 - Time 2023-11-03T20:36:30


 33%|███▎      | 13201/40000 [10:16:12<19:34:11,  2.63s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2558471


 33%|███▎      | 13220/40000 [10:17:06<24:04:17,  3.24s/it]

Episode 13220 - Step 2562315 - Epsilon 0.5269872995999536 - Mean Reward 756.92 - Mean Length 197.84 - Mean Loss 2.294 - Mean Q Value 43.528 - Time Delta 57.352 - Time 2023-11-03T20:37:28


 33%|███▎      | 13240/40000 [10:17:50<14:06:06,  1.90s/it]

Episode 13240 - Step 2565144 - Epsilon 0.5266147195548326 - Mean Reward 729.58 - Mean Length 173.85 - Mean Loss 2.304 - Mean Q Value 43.544 - Time Delta 42.44 - Time 2023-11-03T20:38:10


 33%|███▎      | 13260/40000 [10:18:44<24:49:20,  3.34s/it]

Episode 13260 - Step 2568881 - Epsilon 0.5261229594407608 - Mean Reward 732.27 - Mean Length 178.13 - Mean Loss 2.292 - Mean Q Value 43.648 - Time Delta 53.176 - Time 2023-11-03T20:39:03


 33%|███▎      | 13280/40000 [10:19:45<21:44:11,  2.93s/it]

Episode 13280 - Step 2573264 - Epsilon 0.5255467758700775 - Mean Reward 731.86 - Mean Length 189.16 - Mean Loss 2.319 - Mean Q Value 43.632 - Time Delta 62.44 - Time 2023-11-03T20:40:06


 33%|███▎      | 13300/40000 [10:20:55<18:28:22,  2.49s/it]

Episode 13300 - Step 2578545 - Epsilon 0.5248533804808154 - Mean Reward 731.89 - Mean Length 200.74 - Mean Loss 2.338 - Mean Q Value 43.492 - Time Delta 75.29 - Time 2023-11-03T20:41:21


 33%|███▎      | 13301/40000 [10:21:03<30:46:46,  4.15s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2578545


 33%|███▎      | 13320/40000 [10:21:58<16:42:06,  2.25s/it]

Episode 13320 - Step 2582635 - Epsilon 0.5243169921074392 - Mean Reward 725.35 - Mean Length 203.2 - Mean Loss 2.294 - Mean Q Value 42.977 - Time Delta 58.43 - Time 2023-11-03T20:42:19


 33%|███▎      | 13340/40000 [10:22:50<29:39:44,  4.01s/it]

Episode 13340 - Step 2586149 - Epsilon 0.5238565818370787 - Mean Reward 713.88 - Mean Length 210.05 - Mean Loss 2.236 - Mean Q Value 42.498 - Time Delta 50.365 - Time 2023-11-03T20:43:10


 33%|███▎      | 13360/40000 [10:23:49<27:26:47,  3.71s/it]

Episode 13360 - Step 2590224 - Epsilon 0.523323174628013 - Mean Reward 706.1 - Mean Length 213.43 - Mean Loss 2.205 - Mean Q Value 41.879 - Time Delta 58.561 - Time 2023-11-03T20:44:08


 33%|███▎      | 13380/40000 [10:24:44<30:19:57,  4.10s/it]

Episode 13380 - Step 2594130 - Epsilon 0.5228123989109018 - Mean Reward 716.1 - Mean Length 208.66 - Mean Loss 2.125 - Mean Q Value 41.357 - Time Delta 55.426 - Time 2023-11-03T20:45:04


 34%|███▎      | 13400/40000 [10:25:31<17:16:13,  2.34s/it]

Episode 13400 - Step 2597431 - Epsilon 0.522381125903232 - Mean Reward 690.55 - Mean Length 188.86 - Mean Loss 2.048 - Mean Q Value 40.882 - Time Delta 46.636 - Time 2023-11-03T20:45:50


 34%|███▎      | 13401/40000 [10:25:32<14:20:44,  1.94s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2597431


 34%|███▎      | 13420/40000 [10:26:04<9:58:37,  1.35s/it] 

Episode 13420 - Step 2599891 - Epsilon 0.5220599602391758 - Mean Reward 657.45 - Mean Length 172.56 - Mean Loss 2.008 - Mean Q Value 40.764 - Time Delta 35.504 - Time 2023-11-03T20:46:26


 34%|███▎      | 13440/40000 [10:27:08<23:40:07,  3.21s/it]

Episode 13440 - Step 2604457 - Epsilon 0.5214643687185637 - Mean Reward 676.29 - Mean Length 183.08 - Mean Loss 2.023 - Mean Q Value 40.559 - Time Delta 65.49 - Time 2023-11-03T20:47:31


 34%|███▎      | 13460/40000 [10:28:13<13:45:40,  1.87s/it]

Episode 13460 - Step 2608802 - Epsilon 0.5208982355139629 - Mean Reward 680.19 - Mean Length 185.78 - Mean Loss 2.004 - Mean Q Value 40.341 - Time Delta 62.087 - Time 2023-11-03T20:48:33


 34%|███▎      | 13480/40000 [10:29:26<20:54:37,  2.84s/it]

Episode 13480 - Step 2614036 - Epsilon 0.5202170858269245 - Mean Reward 708.74 - Mean Length 199.06 - Mean Loss 2.028 - Mean Q Value 40.175 - Time Delta 75.085 - Time 2023-11-03T20:49:49


 34%|███▍      | 13500/40000 [10:30:19<12:58:31,  1.76s/it]

Episode 13500 - Step 2617574 - Epsilon 0.5197571571903787 - Mean Reward 718.63 - Mean Length 201.43 - Mean Loss 2.073 - Mean Q Value 39.822 - Time Delta 51.094 - Time 2023-11-03T20:50:40


 34%|███▍      | 13501/40000 [10:30:22<13:33:03,  1.84s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2617574


 34%|███▍      | 13520/40000 [10:31:27<19:45:37,  2.69s/it]

Episode 13520 - Step 2622353 - Epsilon 0.519136548059365 - Mean Reward 755.44 - Mean Length 224.62 - Mean Loss 2.14 - Mean Q Value 39.478 - Time Delta 67.77 - Time 2023-11-03T20:51:47


 34%|███▍      | 13540/40000 [10:32:33<22:02:44,  3.00s/it]

Episode 13540 - Step 2627061 - Epsilon 0.5185258837123402 - Mean Reward 779.46 - Mean Length 226.04 - Mean Loss 2.195 - Mean Q Value 39.16 - Time Delta 65.997 - Time 2023-11-03T20:52:53


 34%|███▍      | 13560/40000 [10:33:36<21:38:02,  2.95s/it]

Episode 13560 - Step 2631546 - Epsilon 0.5179448123165424 - Mean Reward 810.49 - Mean Length 227.44 - Mean Loss 2.269 - Mean Q Value 38.934 - Time Delta 63.033 - Time 2023-11-03T20:53:56


 34%|███▍      | 13580/40000 [10:34:19<14:52:50,  2.03s/it]

Episode 13580 - Step 2634880 - Epsilon 0.5175132851254156 - Mean Reward 777.41 - Mean Length 208.44 - Mean Loss 2.302 - Mean Q Value 38.687 - Time Delta 46.732 - Time 2023-11-03T20:54:43


 34%|███▍      | 13600/40000 [10:35:32<25:21:14,  3.46s/it]

Episode 13600 - Step 2639755 - Epsilon 0.5168829499188216 - Mean Reward 788.09 - Mean Length 221.81 - Mean Loss 2.312 - Mean Q Value 38.578 - Time Delta 68.295 - Time 2023-11-03T20:55:51


 34%|███▍      | 13601/40000 [10:35:33<19:56:25,  2.72s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2639755


 34%|███▍      | 13620/40000 [10:36:22<15:47:53,  2.16s/it]

Episode 13620 - Step 2643597 - Epsilon 0.5163867221348698 - Mean Reward 803.02 - Mean Length 212.44 - Mean Loss 2.299 - Mean Q Value 38.457 - Time Delta 53.58 - Time 2023-11-03T20:56:45


 34%|███▍      | 13640/40000 [10:37:19<20:43:11,  2.83s/it]

Episode 13640 - Step 2648003 - Epsilon 0.5158182352409345 - Mean Reward 798.11 - Mean Length 209.42 - Mean Loss 2.257 - Mean Q Value 38.378 - Time Delta 62.125 - Time 2023-11-03T20:57:47


 34%|███▍      | 13660/40000 [10:38:13<16:13:07,  2.22s/it]

Episode 13660 - Step 2651288 - Epsilon 0.5153947933618741 - Mean Reward 759.81 - Mean Length 197.42 - Mean Loss 2.217 - Mean Q Value 38.339 - Time Delta 46.743 - Time 2023-11-03T20:58:34


 34%|███▍      | 13680/40000 [10:39:03<15:05:15,  2.06s/it]

Episode 13680 - Step 2654698 - Epsilon 0.5149556064755811 - Mean Reward 771.58 - Mean Length 198.18 - Mean Loss 2.14 - Mean Q Value 38.287 - Time Delta 49.053 - Time 2023-11-03T20:59:23


 34%|███▍      | 13700/40000 [10:40:28<32:58:46,  4.51s/it]

Episode 13700 - Step 2660554 - Epsilon 0.5142022629554389 - Mean Reward 789.37 - Mean Length 207.99 - Mean Loss 2.085 - Mean Q Value 38.19 - Time Delta 84.274 - Time 2023-11-03T21:00:47


 34%|███▍      | 13701/40000 [10:40:29<25:32:35,  3.50s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2660554


 34%|███▍      | 13720/40000 [10:41:10<23:43:53,  3.25s/it]

Episode 13720 - Step 2663482 - Epsilon 0.5138260045792377 - Mean Reward 760.43 - Mean Length 198.85 - Mean Loss 2.037 - Mean Q Value 38.135 - Time Delta 43.604 - Time 2023-11-03T21:01:31


 34%|███▍      | 13740/40000 [10:42:00<7:45:40,  1.06s/it]

Episode 13740 - Step 2666870 - Epsilon 0.5133909781586166 - Mean Reward 733.78 - Mean Length 188.67 - Mean Loss 1.997 - Mean Q Value 37.953 - Time Delta 49.792 - Time 2023-11-03T21:02:21


 34%|███▍      | 13760/40000 [10:43:01<33:04:17,  4.54s/it]

Episode 13760 - Step 2670947 - Epsilon 0.5128679709215206 - Mean Reward 733.53 - Mean Length 196.59 - Mean Loss 1.977 - Mean Q Value 37.788 - Time Delta 59.963 - Time 2023-11-03T21:03:21


 34%|███▍      | 13780/40000 [10:44:07<35:11:01,  4.83s/it]

Episode 13780 - Step 2675405 - Epsilon 0.5122966978974619 - Mean Reward 766.06 - Mean Length 207.07 - Mean Loss 2.018 - Mean Q Value 37.73 - Time Delta 66.252 - Time 2023-11-03T21:04:27


 34%|███▍      | 13800/40000 [10:45:12<25:05:05,  3.45s/it]

Episode 13800 - Step 2680316 - Epsilon 0.5116681115003893 - Mean Reward 774.96 - Mean Length 197.62 - Mean Loss 2.032 - Mean Q Value 37.821 - Time Delta 74.244 - Time 2023-11-03T21:05:41


 35%|███▍      | 13801/40000 [10:45:23<41:22:03,  5.68s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2680316


 35%|███▍      | 13820/40000 [10:46:25<23:54:48,  3.29s/it]

Episode 13820 - Step 2684675 - Epsilon 0.5111108248128817 - Mean Reward 814.47 - Mean Length 211.93 - Mean Loss 2.051 - Mean Q Value 38.009 - Time Delta 63.318 - Time 2023-11-03T21:06:44


 35%|███▍      | 13840/40000 [10:47:09<14:28:40,  1.99s/it]

Episode 13840 - Step 2687733 - Epsilon 0.5107202298623773 - Mean Reward 815.32 - Mean Length 208.63 - Mean Loss 2.076 - Mean Q Value 38.231 - Time Delta 44.326 - Time 2023-11-03T21:07:29


 35%|███▍      | 13860/40000 [10:47:59<16:56:38,  2.33s/it]

Episode 13860 - Step 2691260 - Epsilon 0.5102701007232151 - Mean Reward 825.6 - Mean Length 203.13 - Mean Loss 2.109 - Mean Q Value 38.446 - Time Delta 51.267 - Time 2023-11-03T21:08:20


 35%|███▍      | 13880/40000 [10:48:51<17:00:18,  2.34s/it]

Episode 13880 - Step 2694923 - Epsilon 0.5098030347105192 - Mean Reward 791.4 - Mean Length 195.18 - Mean Loss 2.13 - Mean Q Value 38.782 - Time Delta 52.574 - Time 2023-11-03T21:09:13


 35%|███▍      | 13900/40000 [10:49:41<17:26:43,  2.41s/it]

Episode 13900 - Step 2698225 - Epsilon 0.5093823659076605 - Mean Reward 717.14 - Mean Length 179.09 - Mean Loss 2.163 - Mean Q Value 38.978 - Time Delta 47.951 - Time 2023-11-03T21:10:01


 35%|███▍      | 13901/40000 [10:49:42<14:27:26,  1.99s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2698225


 35%|███▍      | 13920/40000 [10:50:55<24:31:36,  3.39s/it]

Episode 13920 - Step 2703416 - Epsilon 0.5087217436136415 - Mean Reward 684.98 - Mean Length 187.41 - Mean Loss 2.176 - Mean Q Value 38.94 - Time Delta 76.043 - Time 2023-11-03T21:11:17


 35%|███▍      | 13940/40000 [10:51:56<24:43:49,  3.42s/it]

Episode 13940 - Step 2707617 - Epsilon 0.5081877390038426 - Mean Reward 711.14 - Mean Length 198.84 - Mean Loss 2.198 - Mean Q Value 38.877 - Time Delta 61.452 - Time 2023-11-03T21:12:18


 35%|███▍      | 13960/40000 [10:52:50<13:51:38,  1.92s/it]

Episode 13960 - Step 2711119 - Epsilon 0.5077430152889026 - Mean Reward 707.64 - Mean Length 198.59 - Mean Loss 2.174 - Mean Q Value 38.849 - Time Delta 53.668 - Time 2023-11-03T21:13:12


 35%|███▍      | 13980/40000 [10:54:05<30:28:03,  4.22s/it]

Episode 13980 - Step 2716049 - Epsilon 0.507117607431058 - Mean Reward 728.16 - Mean Length 211.26 - Mean Loss 2.119 - Mean Q Value 38.655 - Time Delta 74.296 - Time 2023-11-03T21:14:26


 35%|███▌      | 14000/40000 [10:54:58<18:10:34,  2.52s/it]

Episode 14000 - Step 2719607 - Epsilon 0.5066667268217034 - Mean Reward 750.99 - Mean Length 213.82 - Mean Loss 2.053 - Mean Q Value 38.577 - Time Delta 52.4 - Time 2023-11-03T21:15:18


 35%|███▌      | 14001/40000 [10:55:00<17:01:45,  2.36s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2719607


 35%|███▌      | 14020/40000 [10:56:06<32:32:19,  4.51s/it]

Episode 14020 - Step 2724272 - Epsilon 0.5060761711123776 - Mean Reward 789.26 - Mean Length 208.56 - Mean Loss 1.991 - Mean Q Value 38.671 - Time Delta 68.393 - Time 2023-11-03T21:16:27


 35%|███▌      | 14040/40000 [10:56:56<13:52:24,  1.92s/it]

Episode 14040 - Step 2727767 - Epsilon 0.5056341801254904 - Mean Reward 786.06 - Mean Length 201.5 - Mean Loss 1.92 - Mean Q Value 38.878 - Time Delta 51.043 - Time 2023-11-03T21:17:18


 35%|███▌      | 14060/40000 [10:57:50<22:06:51,  3.07s/it]

Episode 14060 - Step 2731484 - Epsilon 0.5051645377462238 - Mean Reward 794.88 - Mean Length 203.65 - Mean Loss 1.876 - Mean Q Value 39.071 - Time Delta 54.121 - Time 2023-11-03T21:18:12


 35%|███▌      | 14080/40000 [10:58:57<31:02:23,  4.31s/it]

Episode 14080 - Step 2735915 - Epsilon 0.504605251491412 - Mean Reward 778.04 - Mean Length 198.66 - Mean Loss 1.883 - Mean Q Value 39.24 - Time Delta 64.417 - Time 2023-11-03T21:19:16


 35%|███▌      | 14100/40000 [10:59:41<17:08:16,  2.38s/it]

Episode 14100 - Step 2739097 - Epsilon 0.5042039975835421 - Mean Reward 774.92 - Mean Length 194.9 - Mean Loss 1.888 - Mean Q Value 39.335 - Time Delta 46.287 - Time 2023-11-03T21:20:03


 35%|███▌      | 14101/40000 [10:59:44<18:53:40,  2.63s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2739097


 35%|███▌      | 14120/40000 [11:00:27<9:54:43,  1.38s/it] 

Episode 14120 - Step 2742018 - Epsilon 0.5038359369727323 - Mean Reward 719.76 - Mean Length 177.46 - Mean Loss 1.916 - Mean Q Value 39.443 - Time Delta 43.3 - Time 2023-11-03T21:20:46


 35%|███▌      | 14140/40000 [11:01:08<11:58:36,  1.67s/it]

Episode 14140 - Step 2744945 - Epsilon 0.5034673898383613 - Mean Reward 697.03 - Mean Length 171.78 - Mean Loss 1.934 - Mean Q Value 39.435 - Time Delta 42.613 - Time 2023-11-03T21:21:29


 35%|███▌      | 14160/40000 [11:02:07<37:07:44,  5.17s/it]

Episode 14160 - Step 2749141 - Epsilon 0.502939529391573 - Mean Reward 685.92 - Mean Length 176.57 - Mean Loss 1.926 - Mean Q Value 39.396 - Time Delta 60.212 - Time 2023-11-03T21:22:29


 35%|███▌      | 14180/40000 [11:03:05<14:37:17,  2.04s/it]

Episode 14180 - Step 2753228 - Epsilon 0.5024259133014435 - Mean Reward 706.39 - Mean Length 173.13 - Mean Loss 1.932 - Mean Q Value 39.35 - Time Delta 58.347 - Time 2023-11-03T21:23:27


 36%|███▌      | 14200/40000 [11:04:02<19:09:58,  2.67s/it]

Episode 14200 - Step 2756979 - Epsilon 0.5019549841837146 - Mean Reward 735.98 - Mean Length 178.82 - Mean Loss 1.93 - Mean Q Value 39.255 - Time Delta 54.614 - Time 2023-11-03T21:24:22


 36%|███▌      | 14201/40000 [11:04:03<15:44:34,  2.20s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2756979


 36%|███▌      | 14220/40000 [11:05:06<18:42:45,  2.61s/it]

Episode 14220 - Step 2761611 - Epsilon 0.5013740566612831 - Mean Reward 776.58 - Mean Length 195.93 - Mean Loss 1.939 - Mean Q Value 39.213 - Time Delta 66.952 - Time 2023-11-03T21:25:29


 36%|███▌      | 14240/40000 [11:06:01<20:17:54,  2.84s/it]

Episode 14240 - Step 2765127 - Epsilon 0.5009335424447117 - Mean Reward 791.66 - Mean Length 201.82 - Mean Loss 1.944 - Mean Q Value 39.237 - Time Delta 51.334 - Time 2023-11-03T21:26:20


 36%|███▌      | 14260/40000 [11:07:01<14:44:33,  2.06s/it]

Episode 14260 - Step 2769751 - Epsilon 0.5003547977760886 - Mean Reward 825.4 - Mean Length 206.1 - Mean Loss 1.959 - Mean Q Value 39.235 - Time Delta 66.722 - Time 2023-11-03T21:27:27


 36%|███▌      | 14280/40000 [11:07:52<10:05:48,  1.41s/it]

Episode 14280 - Step 2772897 - Epsilon 0.4999614233931668 - Mean Reward 801.37 - Mean Length 196.69 - Mean Loss 1.943 - Mean Q Value 39.304 - Time Delta 46.221 - Time 2023-11-03T21:28:13


 36%|███▌      | 14300/40000 [11:09:29<43:13:52,  6.06s/it]

Episode 14300 - Step 2779627 - Epsilon 0.49912094544347335 - Mean Reward 819.65 - Mean Length 226.48 - Mean Loss 1.924 - Mean Q Value 39.431 - Time Delta 97.787 - Time 2023-11-03T21:29:51


 36%|███▌      | 14301/40000 [11:09:32<36:37:25,  5.13s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2779627


 36%|███▌      | 14320/40000 [11:10:21<20:25:18,  2.86s/it]

Episode 14320 - Step 2783124 - Epsilon 0.4986847795891171 - Mean Reward 818.65 - Mean Length 215.13 - Mean Loss 1.889 - Mean Q Value 39.321 - Time Delta 50.628 - Time 2023-11-03T21:30:41


 36%|███▌      | 14340/40000 [11:11:08<16:34:51,  2.33s/it]

Episode 14340 - Step 2786570 - Epsilon 0.49825534760215107 - Mean Reward 811.26 - Mean Length 214.43 - Mean Loss 1.88 - Mean Q Value 39.316 - Time Delta 49.723 - Time 2023-11-03T21:31:31


 36%|███▌      | 14360/40000 [11:12:10<10:56:42,  1.54s/it]

Episode 14360 - Step 2790753 - Epsilon 0.49773456935661153 - Mean Reward 783.06 - Mean Length 210.02 - Mean Loss 1.88 - Mean Q Value 39.136 - Time Delta 60.517 - Time 2023-11-03T21:32:32


 36%|███▌      | 14380/40000 [11:13:16<22:42:07,  3.19s/it]

Episode 14380 - Step 2795330 - Epsilon 0.4971653622241315 - Mean Reward 786.04 - Mean Length 224.33 - Mean Loss 1.849 - Mean Q Value 38.966 - Time Delta 66.412 - Time 2023-11-03T21:33:38


 36%|███▌      | 14400/40000 [11:14:11<15:41:27,  2.21s/it]

Episode 14400 - Step 2799145 - Epsilon 0.49669141674927525 - Mean Reward 757.59 - Mean Length 195.18 - Mean Loss 1.83 - Mean Q Value 38.759 - Time Delta 54.99 - Time 2023-11-03T21:34:33


 36%|███▌      | 14401/40000 [11:14:15<18:39:26,  2.62s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2799145


 36%|███▌      | 14420/40000 [11:14:44<10:13:48,  1.44s/it]

Episode 14420 - Step 2801306 - Epsilon 0.4964231516494404 - Mean Reward 703.99 - Mean Length 181.82 - Mean Loss 1.811 - Mean Q Value 38.717 - Time Delta 31.18 - Time 2023-11-03T21:35:04


 36%|███▌      | 14440/40000 [11:15:33<16:56:51,  2.39s/it]

Episode 14440 - Step 2804778 - Epsilon 0.4959924432543952 - Mean Reward 699.81 - Mean Length 182.08 - Mean Loss 1.785 - Mean Q Value 38.679 - Time Delta 50.032 - Time 2023-11-03T21:35:54


 36%|███▌      | 14460/40000 [11:16:21<19:50:40,  2.80s/it]

Episode 14460 - Step 2808034 - Epsilon 0.4955888696318317 - Mean Reward 681.95 - Mean Length 172.81 - Mean Loss 1.738 - Mean Q Value 38.795 - Time Delta 47.338 - Time 2023-11-03T21:36:42


 36%|███▌      | 14480/40000 [11:17:03<14:15:42,  2.01s/it]

Episode 14480 - Step 2810993 - Epsilon 0.4952223932868112 - Mean Reward 672.01 - Mean Length 156.63 - Mean Loss 1.736 - Mean Q Value 38.972 - Time Delta 42.799 - Time 2023-11-03T21:37:24


 36%|███▋      | 14500/40000 [11:18:07<22:57:48,  3.24s/it]

Episode 14500 - Step 2815717 - Epsilon 0.4946378807897595 - Mean Reward 716.41 - Mean Length 165.72 - Mean Loss 1.739 - Mean Q Value 39.191 - Time Delta 67.888 - Time 2023-11-03T21:38:32


 36%|███▋      | 14501/40000 [11:18:14<30:46:32,  4.34s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2815717


 36%|███▋      | 14520/40000 [11:19:02<14:47:11,  2.09s/it]

Episode 14520 - Step 2819216 - Epsilon 0.49420538543964915 - Mean Reward 753.66 - Mean Length 179.1 - Mean Loss 1.725 - Mean Q Value 39.355 - Time Delta 50.668 - Time 2023-11-03T21:39:23


 36%|███▋      | 14540/40000 [11:20:11<30:29:15,  4.31s/it]

Episode 14540 - Step 2823991 - Epsilon 0.49361577967795467 - Mean Reward 785.81 - Mean Length 192.13 - Mean Loss 1.718 - Mean Q Value 39.438 - Time Delta 68.306 - Time 2023-11-03T21:40:31


 36%|███▋      | 14560/40000 [11:21:00<16:56:09,  2.40s/it]

Episode 14560 - Step 2827514 - Epsilon 0.4931812189234059 - Mean Reward 818.37 - Mean Length 194.8 - Mean Loss 1.723 - Mean Q Value 39.506 - Time Delta 50.495 - Time 2023-11-03T21:41:22


 36%|███▋      | 14580/40000 [11:21:53<19:29:52,  2.76s/it]

Episode 14580 - Step 2831056 - Epsilon 0.4927447001966143 - Mean Reward 830.12 - Mean Length 200.63 - Mean Loss 1.712 - Mean Q Value 39.526 - Time Delta 50.343 - Time 2023-11-03T21:42:12


 36%|███▋      | 14600/40000 [11:22:35<19:39:58,  2.79s/it]

Episode 14600 - Step 2834137 - Epsilon 0.4923653096753431 - Mean Reward 760.88 - Mean Length 184.2 - Mean Loss 1.739 - Mean Q Value 39.509 - Time Delta 44.094 - Time 2023-11-03T21:42:56


 37%|███▋      | 14601/40000 [11:22:38<20:30:21,  2.91s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2834137


 37%|███▋      | 14620/40000 [11:23:35<19:20:38,  2.74s/it]

Episode 14620 - Step 2838414 - Epsilon 0.49183912936140933 - Mean Reward 798.76 - Mean Length 191.98 - Mean Loss 1.739 - Mean Q Value 39.546 - Time Delta 61.059 - Time 2023-11-03T21:43:57


 37%|███▋      | 14640/40000 [11:24:40<33:22:07,  4.74s/it]

Episode 14640 - Step 2842922 - Epsilon 0.49128513882559666 - Mean Reward 781.89 - Mean Length 189.31 - Mean Loss 1.755 - Mean Q Value 39.671 - Time Delta 64.039 - Time 2023-11-03T21:45:01


 37%|███▋      | 14660/40000 [11:25:37<24:51:02,  3.53s/it]

Episode 14660 - Step 2846889 - Epsilon 0.49079814825458795 - Mean Reward 782.58 - Mean Length 193.75 - Mean Loss 1.776 - Mean Q Value 39.737 - Time Delta 56.704 - Time 2023-11-03T21:45:58


 37%|███▋      | 14680/40000 [11:26:34<23:14:10,  3.30s/it]

Episode 14680 - Step 2850872 - Epsilon 0.49030967917424123 - Mean Reward 780.13 - Mean Length 198.16 - Mean Loss 1.798 - Mean Q Value 39.756 - Time Delta 56.721 - Time 2023-11-03T21:46:55


 37%|███▋      | 14700/40000 [11:27:12<9:34:08,  1.36s/it] 

Episode 14700 - Step 2853580 - Epsilon 0.4899778518161509 - Mean Reward 773.02 - Mean Length 194.43 - Mean Loss 1.827 - Mean Q Value 39.867 - Time Delta 39.091 - Time 2023-11-03T21:47:34


 37%|███▋      | 14701/40000 [11:27:15<13:32:32,  1.93s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2853580


 37%|███▋      | 14720/40000 [11:28:04<15:37:55,  2.23s/it]

Episode 14720 - Step 2857390 - Epsilon 0.4895113700511292 - Mean Reward 737.88 - Mean Length 189.76 - Mean Loss 1.875 - Mean Q Value 39.845 - Time Delta 54.048 - Time 2023-11-03T21:48:28


 37%|███▋      | 14740/40000 [11:28:54<26:19:21,  3.75s/it]

Episode 14740 - Step 2860596 - Epsilon 0.4891191838285793 - Mean Reward 714.69 - Mean Length 176.74 - Mean Loss 1.899 - Mean Q Value 39.714 - Time Delta 46.461 - Time 2023-11-03T21:49:14


 37%|███▋      | 14760/40000 [11:30:04<13:42:07,  1.95s/it]

Episode 14760 - Step 2865879 - Epsilon 0.48847360600304773 - Mean Reward 726.54 - Mean Length 189.9 - Mean Loss 1.943 - Mean Q Value 39.647 - Time Delta 74.665 - Time 2023-11-03T21:50:29


 37%|███▋      | 14780/40000 [11:30:55<20:38:36,  2.95s/it]

Episode 14780 - Step 2869087 - Epsilon 0.48808200717414835 - Mean Reward 728.44 - Mean Length 182.15 - Mean Loss 1.969 - Mean Q Value 39.533 - Time Delta 45.435 - Time 2023-11-03T21:51:14


 37%|███▋      | 14800/40000 [11:31:47<22:47:44,  3.26s/it]

Episode 14800 - Step 2872660 - Epsilon 0.48764622252799983 - Mean Reward 741.32 - Mean Length 190.8 - Mean Loss 1.982 - Mean Q Value 39.427 - Time Delta 52.031 - Time 2023-11-03T21:52:06


 37%|███▋      | 14801/40000 [11:31:48<18:07:26,  2.59s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2872660


 37%|███▋      | 14820/40000 [11:32:30<13:56:07,  1.99s/it]

Episode 14820 - Step 2875642 - Epsilon 0.4872828176996108 - Mean Reward 731.54 - Mean Length 182.52 - Mean Loss 1.962 - Mean Q Value 39.452 - Time Delta 43.003 - Time 2023-11-03T21:52:49


 37%|███▋      | 14840/40000 [11:33:32<24:55:24,  3.57s/it]

Episode 14840 - Step 2879891 - Epsilon 0.4867654762833755 - Mean Reward 761.18 - Mean Length 192.95 - Mean Loss 1.963 - Mean Q Value 39.6 - Time Delta 61.757 - Time 2023-11-03T21:53:51


 37%|███▋      | 14860/40000 [11:34:10<10:17:43,  1.47s/it]

Episode 14860 - Step 2882745 - Epsilon 0.4864182929449861 - Mean Reward 710.33 - Mean Length 168.66 - Mean Loss 1.964 - Mean Q Value 39.7 - Time Delta 42.137 - Time 2023-11-03T21:54:33


 37%|███▋      | 14880/40000 [11:35:01<18:35:03,  2.66s/it]

Episode 14880 - Step 2885933 - Epsilon 0.48603077196463734 - Mean Reward 711.82 - Mean Length 168.46 - Mean Loss 1.961 - Mean Q Value 39.827 - Time Delta 46.62 - Time 2023-11-03T21:55:20


 37%|███▋      | 14900/40000 [11:35:44<21:13:56,  3.05s/it]

Episode 14900 - Step 2889099 - Epsilon 0.4856462307627599 - Mean Reward 711.64 - Mean Length 164.39 - Mean Loss 1.927 - Mean Q Value 40.052 - Time Delta 45.458 - Time 2023-11-03T21:56:05


 37%|███▋      | 14901/40000 [11:35:47<20:36:24,  2.96s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2889099


 37%|███▋      | 14920/40000 [11:36:28<13:22:49,  1.92s/it]

Episode 14920 - Step 2892209 - Epsilon 0.48526878752118363 - Mean Reward 717.46 - Mean Length 165.67 - Mean Loss 1.961 - Mean Q Value 40.224 - Time Delta 44.393 - Time 2023-11-03T21:56:50


 37%|███▋      | 14940/40000 [11:37:18<18:58:59,  2.73s/it]

Episode 14940 - Step 2895686 - Epsilon 0.4848471508552509 - Mean Reward 713.37 - Mean Length 157.95 - Mean Loss 1.985 - Mean Q Value 40.373 - Time Delta 50.561 - Time 2023-11-03T21:57:40


 37%|███▋      | 14960/40000 [11:38:03<21:42:42,  3.12s/it]

Episode 14960 - Step 2898670 - Epsilon 0.4844855897145405 - Mean Reward 729.5 - Mean Length 159.25 - Mean Loss 1.992 - Mean Q Value 40.626 - Time Delta 43.446 - Time 2023-11-03T21:58:24


 37%|███▋      | 14980/40000 [11:38:54<18:19:12,  2.64s/it]

Episode 14980 - Step 2902333 - Epsilon 0.48404212506263355 - Mean Reward 726.1 - Mean Length 164.0 - Mean Loss 2.011 - Mean Q Value 40.871 - Time Delta 53.241 - Time 2023-11-03T21:59:17


 38%|███▊      | 15000/40000 [11:39:40<10:26:06,  1.50s/it]

Episode 15000 - Step 2905448 - Epsilon 0.4836653239465651 - Mean Reward 731.06 - Mean Length 163.49 - Mean Loss 2.031 - Mean Q Value 41.067 - Time Delta 44.786 - Time 2023-11-03T22:00:02


 38%|███▊      | 15001/40000 [11:39:44<15:10:22,  2.18s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2905448


 38%|███▊      | 15020/40000 [11:40:32<19:54:56,  2.87s/it]

Episode 15020 - Step 2909032 - Epsilon 0.4832321538512853 - Mean Reward 742.38 - Mean Length 168.23 - Mean Loss 2.013 - Mean Q Value 41.219 - Time Delta 51.505 - Time 2023-11-03T22:00:53


 38%|███▊      | 15040/40000 [11:41:23<16:17:08,  2.35s/it]

Episode 15040 - Step 2912592 - Epsilon 0.4828022685079152 - Mean Reward 766.27 - Mean Length 169.06 - Mean Loss 1.998 - Mean Q Value 41.245 - Time Delta 51.271 - Time 2023-11-03T22:01:45


 38%|███▊      | 15060/40000 [11:42:11<13:58:09,  2.02s/it]

Episode 15060 - Step 2915980 - Epsilon 0.4823935080698201 - Mean Reward 781.33 - Mean Length 173.1 - Mean Loss 2.0 - Mean Q Value 41.267 - Time Delta 49.158 - Time 2023-11-03T22:02:34


 38%|███▊      | 15080/40000 [11:42:56<14:55:31,  2.16s/it]

Episode 15080 - Step 2918900 - Epsilon 0.4820414892673745 - Mean Reward 764.57 - Mean Length 165.67 - Mean Loss 1.98 - Mean Q Value 41.377 - Time Delta 42.697 - Time 2023-11-03T22:03:16


 38%|███▊      | 15100/40000 [11:43:50<25:36:34,  3.70s/it]

Episode 15100 - Step 2922823 - Epsilon 0.48156895877266104 - Mean Reward 794.55 - Mean Length 173.75 - Mean Loss 1.973 - Mean Q Value 41.355 - Time Delta 55.62 - Time 2023-11-03T22:04:12


 38%|███▊      | 15101/40000 [11:43:54<25:59:05,  3.76s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2922823


 38%|███▊      | 15120/40000 [11:44:49<29:33:48,  4.28s/it]

Episode 15120 - Step 2926724 - Epsilon 0.4810995375256179 - Mean Reward 767.4 - Mean Length 176.92 - Mean Loss 1.974 - Mean Q Value 41.453 - Time Delta 56.068 - Time 2023-11-03T22:05:08


 38%|███▊      | 15140/40000 [11:45:33<18:39:36,  2.70s/it]

Episode 15140 - Step 2929895 - Epsilon 0.4807182969537321 - Mean Reward 745.7 - Mean Length 173.03 - Mean Loss 1.953 - Mean Q Value 41.51 - Time Delta 44.775 - Time 2023-11-03T22:05:53


 38%|███▊      | 15160/40000 [11:46:25<23:24:50,  3.39s/it]

Episode 15160 - Step 2933756 - Epsilon 0.48025450743198755 - Mean Reward 724.47 - Mean Length 177.76 - Mean Loss 1.968 - Mean Q Value 41.505 - Time Delta 54.458 - Time 2023-11-03T22:06:47


 38%|███▊      | 15180/40000 [11:47:09<16:35:12,  2.41s/it]

Episode 15180 - Step 2936762 - Epsilon 0.4798937317029652 - Mean Reward 736.27 - Mean Length 178.62 - Mean Loss 2.005 - Mean Q Value 41.437 - Time Delta 43.437 - Time 2023-11-03T22:07:31


 38%|███▊      | 15200/40000 [11:47:58<21:06:12,  3.06s/it]

Episode 15200 - Step 2940066 - Epsilon 0.4794975030959386 - Mean Reward 707.74 - Mean Length 172.43 - Mean Loss 2.023 - Mean Q Value 41.504 - Time Delta 46.941 - Time 2023-11-03T22:08:18


 38%|███▊      | 15201/40000 [11:47:59<16:50:31,  2.44s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2940066


 38%|███▊      | 15220/40000 [11:48:36<12:43:32,  1.85s/it]

Episode 15220 - Step 2942763 - Epsilon 0.4791743108324669 - Mean Reward 707.19 - Mean Length 160.39 - Mean Loss 2.048 - Mean Q Value 41.518 - Time Delta 38.597 - Time 2023-11-03T22:08:56


 38%|███▊      | 15240/40000 [11:49:26<23:40:45,  3.44s/it]

Episode 15240 - Step 2946246 - Epsilon 0.4787572513527422 - Mean Reward 685.07 - Mean Length 163.51 - Mean Loss 2.105 - Mean Q Value 41.649 - Time Delta 49.244 - Time 2023-11-03T22:09:46


 38%|███▊      | 15260/40000 [11:50:15<18:06:18,  2.63s/it]

Episode 15260 - Step 2949759 - Epsilon 0.47833696732838493 - Mean Reward 711.52 - Mean Length 160.03 - Mean Loss 2.095 - Mean Q Value 41.844 - Time Delta 50.372 - Time 2023-11-03T22:10:36


 38%|███▊      | 15280/40000 [11:51:07<18:02:55,  2.63s/it]

Episode 15280 - Step 2953702 - Epsilon 0.4778656789282971 - Mean Reward 721.18 - Mean Length 169.4 - Mean Loss 2.119 - Mean Q Value 42.105 - Time Delta 55.732 - Time 2023-11-03T22:11:32


 38%|███▊      | 15300/40000 [11:51:55<10:42:09,  1.56s/it]

Episode 15300 - Step 2956789 - Epsilon 0.4774970283158028 - Mean Reward 720.07 - Mean Length 167.23 - Mean Loss 2.136 - Mean Q Value 42.231 - Time Delta 43.574 - Time 2023-11-03T22:12:15


 38%|███▊      | 15301/40000 [11:51:57<11:18:27,  1.65s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2956789


 38%|███▊      | 15320/40000 [11:52:53<17:30:41,  2.55s/it]

Episode 15320 - Step 2960901 - Epsilon 0.4770064135285056 - Mean Reward 741.65 - Mean Length 181.38 - Mean Loss 2.148 - Mean Q Value 42.349 - Time Delta 58.704 - Time 2023-11-03T22:13:14


 38%|███▊      | 15340/40000 [11:53:40<11:52:43,  1.73s/it]

Episode 15340 - Step 2964216 - Epsilon 0.47661125817895866 - Mean Reward 765.89 - Mean Length 179.7 - Mean Loss 2.18 - Mean Q Value 42.589 - Time Delta 47.231 - Time 2023-11-03T22:14:01


 38%|███▊      | 15360/40000 [11:54:35<15:22:42,  2.25s/it]

Episode 15360 - Step 2968031 - Epsilon 0.47615690683785006 - Mean Reward 765.45 - Mean Length 182.72 - Mean Loss 2.2 - Mean Q Value 42.747 - Time Delta 54.054 - Time 2023-11-03T22:14:55


 38%|███▊      | 15380/40000 [11:55:23<14:57:28,  2.19s/it]

Episode 15380 - Step 2971255 - Epsilon 0.4757732789457418 - Mean Reward 758.98 - Mean Length 175.53 - Mean Loss 2.208 - Mean Q Value 42.791 - Time Delta 47.542 - Time 2023-11-03T22:15:43


 38%|███▊      | 15400/40000 [11:56:10<12:58:15,  1.90s/it]

Episode 15400 - Step 2974476 - Epsilon 0.47539031667582404 - Mean Reward 750.34 - Mean Length 176.87 - Mean Loss 2.242 - Mean Q Value 42.895 - Time Delta 46.082 - Time 2023-11-03T22:16:29


 39%|███▊      | 15401/40000 [11:56:11<11:14:13,  1.64s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2974476


 39%|███▊      | 15420/40000 [11:56:53<15:11:48,  2.23s/it]

Episode 15420 - Step 2977892 - Epsilon 0.47498450660016317 - Mean Reward 739.59 - Mean Length 169.91 - Mean Loss 2.259 - Mean Q Value 42.978 - Time Delta 48.227 - Time 2023-11-03T22:17:17


 39%|███▊      | 15440/40000 [11:57:55<17:57:40,  2.63s/it]

Episode 15440 - Step 2981960 - Epsilon 0.47450169284893806 - Mean Reward 751.92 - Mean Length 177.44 - Mean Loss 2.236 - Mean Q Value 42.929 - Time Delta 57.695 - Time 2023-11-03T22:18:15


 39%|███▊      | 15460/40000 [11:58:44<18:10:07,  2.67s/it]

Episode 15460 - Step 2985491 - Epsilon 0.4740830112499622 - Mean Reward 716.01 - Mean Length 174.6 - Mean Loss 2.213 - Mean Q Value 42.904 - Time Delta 49.391 - Time 2023-11-03T22:19:04


 39%|███▊      | 15480/40000 [11:59:34<15:42:35,  2.31s/it]

Episode 15480 - Step 2989188 - Epsilon 0.4736450423995394 - Mean Reward 736.78 - Mean Length 179.33 - Mean Loss 2.155 - Mean Q Value 42.968 - Time Delta 51.879 - Time 2023-11-03T22:19:56


 39%|███▉      | 15500/40000 [12:00:16<12:33:51,  1.85s/it]

Episode 15500 - Step 2992164 - Epsilon 0.4732927815012159 - Mean Reward 737.51 - Mean Length 176.88 - Mean Loss 2.138 - Mean Q Value 43.047 - Time Delta 41.87 - Time 2023-11-03T22:20:38


 39%|███▉      | 15501/40000 [12:00:20<17:06:09,  2.51s/it]

MarioNet saved to checkpoints2/mario_net_5.chkpt at step 2992164


 39%|███▉      | 15520/40000 [12:01:22<25:19:29,  3.72s/it]

Episode 15520 - Step 2996739 - Epsilon 0.47275176226902255 - Mean Reward 774.47 - Mean Length 188.47 - Mean Loss 2.18 - Mean Q Value 43.048 - Time Delta 64.286 - Time 2023-11-03T22:21:42


 39%|███▉      | 15539/40000 [12:02:09<22:56:19,  3.38s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3000000


 39%|███▉      | 15540/40000 [12:02:11<19:16:40,  2.84s/it]

Episode 15540 - Step 3000491 - Epsilon 0.472308528969465 - Mean Reward 747.47 - Mean Length 185.31 - Mean Loss 2.214 - Mean Q Value 42.97 - Time Delta 52.345 - Time 2023-11-03T22:22:35


 39%|███▉      | 15560/40000 [12:02:55<11:29:23,  1.69s/it]

Episode 15560 - Step 3003442 - Epsilon 0.4719602118099202 - Mean Reward 760.25 - Mean Length 179.51 - Mean Loss 2.281 - Mean Q Value 42.955 - Time Delta 41.705 - Time 2023-11-03T22:23:16


 39%|███▉      | 15580/40000 [12:03:36<13:09:20,  1.94s/it]

Episode 15580 - Step 3006309 - Epsilon 0.471622055487037 - Mean Reward 730.57 - Mean Length 171.21 - Mean Loss 2.37 - Mean Q Value 42.87 - Time Delta 40.858 - Time 2023-11-03T22:23:57


 39%|███▉      | 15600/40000 [12:04:13<9:55:02,  1.46s/it] 

Episode 15600 - Step 3009007 - Epsilon 0.47130405362899797 - Mean Reward 716.46 - Mean Length 168.43 - Mean Loss 2.405 - Mean Q Value 42.738 - Time Delta 38.595 - Time 2023-11-03T22:24:36


 39%|███▉      | 15601/40000 [12:04:17<16:18:13,  2.41s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3009007


 39%|███▉      | 15620/40000 [12:04:54<15:41:33,  2.32s/it]

Episode 15620 - Step 3011865 - Epsilon 0.4709674271146981 - Mean Reward 672.83 - Mean Length 151.26 - Mean Loss 2.397 - Mean Q Value 42.69 - Time Delta 40.311 - Time 2023-11-03T22:25:16


 39%|███▉      | 15640/40000 [12:05:41<20:31:21,  3.03s/it]

Episode 15640 - Step 3015215 - Epsilon 0.47057315696882035 - Mean Reward 686.45 - Mean Length 147.24 - Mean Loss 2.406 - Mean Q Value 42.724 - Time Delta 47.51 - Time 2023-11-03T22:26:04


 39%|███▉      | 15660/40000 [12:06:57<33:54:56,  5.02s/it]

Episode 15660 - Step 3020408 - Epsilon 0.4699626316843606 - Mean Reward 707.54 - Mean Length 169.66 - Mean Loss 2.404 - Mean Q Value 42.682 - Time Delta 73.229 - Time 2023-11-03T22:27:17


 39%|███▉      | 15680/40000 [12:07:57<18:38:50,  2.76s/it]

Episode 15680 - Step 3024917 - Epsilon 0.469433164718421 - Mean Reward 735.63 - Mean Length 186.08 - Mean Loss 2.382 - Mean Q Value 42.636 - Time Delta 64.669 - Time 2023-11-03T22:28:21


 39%|███▉      | 15700/40000 [12:08:45<15:16:36,  2.26s/it]

Episode 15700 - Step 3028251 - Epsilon 0.46904205514424374 - Mean Reward 756.59 - Mean Length 192.44 - Mean Loss 2.361 - Mean Q Value 42.637 - Time Delta 47.348 - Time 2023-11-03T22:29:09


 39%|███▉      | 15701/40000 [12:08:51<21:46:20,  3.23s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3028251


 39%|███▉      | 15720/40000 [12:09:45<34:17:24,  5.08s/it]

Episode 15720 - Step 3032213 - Epsilon 0.46857769894089 - Mean Reward 793.98 - Mean Length 203.48 - Mean Loss 2.37 - Mean Q Value 42.509 - Time Delta 56.228 - Time 2023-11-03T22:30:05


 39%|███▉      | 15740/40000 [12:10:32<18:44:58,  2.78s/it]

Episode 15740 - Step 3035744 - Epsilon 0.4681642444407392 - Mean Reward 806.18 - Mean Length 205.29 - Mean Loss 2.368 - Mean Q Value 42.409 - Time Delta 50.06 - Time 2023-11-03T22:30:55


 39%|███▉      | 15760/40000 [12:11:20<17:12:45,  2.56s/it]

Episode 15760 - Step 3038919 - Epsilon 0.4677927864671172 - Mean Reward 782.19 - Mean Length 185.11 - Mean Loss 2.367 - Mean Q Value 42.244 - Time Delta 45.101 - Time 2023-11-03T22:31:40


 39%|███▉      | 15780/40000 [12:12:07<16:43:23,  2.49s/it]

Episode 15780 - Step 3042405 - Epsilon 0.46738528259831863 - Mean Reward 772.01 - Mean Length 174.88 - Mean Loss 2.355 - Mean Q Value 42.202 - Time Delta 49.637 - Time 2023-11-03T22:32:30


 40%|███▉      | 15800/40000 [12:12:56<14:15:13,  2.12s/it]

Episode 15800 - Step 3045900 - Epsilon 0.4669770830147301 - Mean Reward 780.6 - Mean Length 176.49 - Mean Loss 2.329 - Mean Q Value 42.044 - Time Delta 49.74 - Time 2023-11-03T22:33:20


 40%|███▉      | 15801/40000 [12:13:01<19:46:41,  2.94s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3045900


 40%|███▉      | 15820/40000 [12:13:59<24:39:09,  3.67s/it]

Episode 15820 - Step 3050028 - Epsilon 0.46649541119019367 - Mean Reward 792.62 - Mean Length 178.15 - Mean Loss 2.282 - Mean Q Value 42.063 - Time Delta 58.68 - Time 2023-11-03T22:34:18


 40%|███▉      | 15840/40000 [12:14:39<8:38:11,  1.29s/it] 

Episode 15840 - Step 3052818 - Epsilon 0.46617014405028556 - Mean Reward 734.71 - Mean Length 170.74 - Mean Loss 2.25 - Mean Q Value 42.068 - Time Delta 39.778 - Time 2023-11-03T22:34:58


 40%|███▉      | 15860/40000 [12:15:14<16:10:04,  2.41s/it]

Episode 15860 - Step 3055377 - Epsilon 0.46587200704006554 - Mean Reward 734.11 - Mean Length 164.58 - Mean Loss 2.218 - Mean Q Value 42.119 - Time Delta 36.614 - Time 2023-11-03T22:35:35


 40%|███▉      | 15880/40000 [12:15:57<13:16:49,  1.98s/it]

Episode 15880 - Step 3058455 - Epsilon 0.4655136563789034 - Mean Reward 709.47 - Mean Length 160.5 - Mean Loss 2.196 - Mean Q Value 42.181 - Time Delta 43.632 - Time 2023-11-03T22:36:18


 40%|███▉      | 15900/40000 [12:16:44<21:35:05,  3.22s/it]

Episode 15900 - Step 3061848 - Epsilon 0.46511895179823476 - Mean Reward 716.25 - Mean Length 159.48 - Mean Loss 2.208 - Mean Q Value 42.337 - Time Delta 48.112 - Time 2023-11-03T22:37:06


 40%|███▉      | 15901/40000 [12:16:48<23:07:22,  3.45s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3061848


 40%|███▉      | 15920/40000 [12:17:28<11:53:51,  1.78s/it]

Episode 15920 - Step 3064884 - Epsilon 0.464766060408881 - Mean Reward 670.85 - Mean Length 148.56 - Mean Loss 2.222 - Mean Q Value 42.449 - Time Delta 42.984 - Time 2023-11-03T22:37:49


 40%|███▉      | 15940/40000 [12:18:27<14:21:19,  2.15s/it]

Episode 15940 - Step 3068901 - Epsilon 0.4642995533184165 - Mean Reward 712.89 - Mean Length 160.83 - Mean Loss 2.212 - Mean Q Value 42.486 - Time Delta 57.429 - Time 2023-11-03T22:38:47


 40%|███▉      | 15960/40000 [12:19:18<12:50:32,  1.92s/it]

Episode 15960 - Step 3072529 - Epsilon 0.46387862449095996 - Mean Reward 737.26 - Mean Length 171.52 - Mean Loss 2.206 - Mean Q Value 42.532 - Time Delta 51.839 - Time 2023-11-03T22:39:39


 40%|███▉      | 15980/40000 [12:20:08<22:09:15,  3.32s/it]

Episode 15980 - Step 3075926 - Epsilon 0.4634848427530715 - Mean Reward 735.54 - Mean Length 174.71 - Mean Loss 2.225 - Mean Q Value 42.489 - Time Delta 48.691 - Time 2023-11-03T22:40:27


 40%|████      | 16000/40000 [12:20:48<15:54:44,  2.39s/it]

Episode 16000 - Step 3078676 - Epsilon 0.4631663063932374 - Mean Reward 711.6 - Mean Length 168.28 - Mean Loss 2.221 - Mean Q Value 42.444 - Time Delta 39.623 - Time 2023-11-03T22:41:07


 40%|████      | 16001/40000 [12:20:49<13:26:30,  2.02s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3078676


 40%|████      | 16020/40000 [12:21:47<27:04:48,  4.07s/it]

Episode 16020 - Step 3082972 - Epsilon 0.46266913274799343 - Mean Reward 744.93 - Mean Length 180.88 - Mean Loss 2.229 - Mean Q Value 42.407 - Time Delta 61.549 - Time 2023-11-03T22:42:08


 40%|████      | 16040/40000 [12:22:55<17:21:36,  2.61s/it]

Episode 16040 - Step 3087658 - Epsilon 0.46212743315368116 - Mean Reward 762.42 - Mean Length 187.57 - Mean Loss 2.226 - Mean Q Value 42.387 - Time Delta 66.752 - Time 2023-11-03T22:43:15


 40%|████      | 16060/40000 [12:23:49<16:15:19,  2.44s/it]

Episode 16060 - Step 3091340 - Epsilon 0.4617022405231862 - Mean Reward 758.29 - Mean Length 188.11 - Mean Loss 2.22 - Mean Q Value 42.341 - Time Delta 53.292 - Time 2023-11-03T22:44:09


 40%|████      | 16080/40000 [12:24:42<17:41:56,  2.66s/it]

Episode 16080 - Step 3094969 - Epsilon 0.4612835510697987 - Mean Reward 762.95 - Mean Length 190.43 - Mean Loss 2.216 - Mean Q Value 42.303 - Time Delta 52.601 - Time 2023-11-03T22:45:01


 40%|████      | 16100/40000 [12:25:30<23:44:22,  3.58s/it]

Episode 16100 - Step 3098603 - Epsilon 0.46086466521894304 - Mean Reward 772.11 - Mean Length 199.27 - Mean Loss 2.211 - Mean Q Value 42.205 - Time Delta 52.656 - Time 2023-11-03T22:45:54


 40%|████      | 16101/40000 [12:25:36<27:48:12,  4.19s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3098603


 40%|████      | 16120/40000 [12:26:13<13:46:56,  2.08s/it]

Episode 16120 - Step 3101388 - Epsilon 0.46054389983505123 - Mean Reward 732.94 - Mean Length 184.16 - Mean Loss 2.211 - Mean Q Value 42.132 - Time Delta 39.975 - Time 2023-11-03T22:46:34


 40%|████      | 16140/40000 [12:27:08<16:32:36,  2.50s/it]

Episode 16140 - Step 3105243 - Epsilon 0.46010026440722773 - Mean Reward 750.36 - Mean Length 175.85 - Mean Loss 2.221 - Mean Q Value 42.08 - Time Delta 55.128 - Time 2023-11-03T22:47:29


 40%|████      | 16160/40000 [12:28:29<61:45:26,  9.33s/it]

Episode 16160 - Step 3110966 - Epsilon 0.45944244657025884 - Mean Reward 770.37 - Mean Length 196.26 - Mean Loss 2.228 - Mean Q Value 42.109 - Time Delta 81.292 - Time 2023-11-03T22:48:50


 40%|████      | 16180/40000 [12:29:20<18:55:29,  2.86s/it]

Episode 16180 - Step 3114380 - Epsilon 0.4590504796887195 - Mean Reward 795.24 - Mean Length 194.11 - Mean Loss 2.214 - Mean Q Value 42.102 - Time Delta 48.694 - Time 2023-11-03T22:49:39


 40%|████      | 16200/40000 [12:30:20<14:57:59,  2.26s/it]

Episode 16200 - Step 3118800 - Epsilon 0.45854350899811297 - Mean Reward 821.81 - Mean Length 201.97 - Mean Loss 2.212 - Mean Q Value 41.957 - Time Delta 62.987 - Time 2023-11-03T22:50:42


 41%|████      | 16201/40000 [12:30:24<16:47:29,  2.54s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3118800


 41%|████      | 16220/40000 [12:31:12<22:45:51,  3.45s/it]

Episode 16220 - Step 3122457 - Epsilon 0.4581244771217088 - Mean Reward 840.82 - Mean Length 210.69 - Mean Loss 2.211 - Mean Q Value 41.964 - Time Delta 52.593 - Time 2023-11-03T22:51:34


 41%|████      | 16240/40000 [12:31:56<10:51:27,  1.65s/it]

Episode 16240 - Step 3125949 - Epsilon 0.4577247089273606 - Mean Reward 800.31 - Mean Length 207.06 - Mean Loss 2.207 - Mean Q Value 41.987 - Time Delta 49.965 - Time 2023-11-03T22:52:24


 41%|████      | 16260/40000 [12:33:01<22:09:35,  3.36s/it]

Episode 16260 - Step 3130096 - Epsilon 0.45725040868390016 - Mean Reward 826.02 - Mean Length 191.3 - Mean Loss 2.213 - Mean Q Value 42.011 - Time Delta 59.415 - Time 2023-11-03T22:53:24


 41%|████      | 16280/40000 [12:33:57<17:01:36,  2.58s/it]

Episode 16280 - Step 3133974 - Epsilon 0.45680731917957657 - Mean Reward 836.4 - Mean Length 195.94 - Mean Loss 2.257 - Mean Q Value 42.066 - Time Delta 55.563 - Time 2023-11-03T22:54:19


 41%|████      | 16300/40000 [12:34:43<17:06:02,  2.60s/it]

Episode 16300 - Step 3137144 - Epsilon 0.45644544274631843 - Mean Reward 810.78 - Mean Length 183.44 - Mean Loss 2.286 - Mean Q Value 42.326 - Time Delta 45.347 - Time 2023-11-03T22:55:05


 41%|████      | 16301/40000 [12:34:47<18:42:12,  2.84s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3137144


 41%|████      | 16320/40000 [12:35:47<19:34:17,  2.98s/it]

Episode 16320 - Step 3141511 - Epsilon 0.4559473902957248 - Mean Reward 827.18 - Mean Length 190.54 - Mean Loss 2.278 - Mean Q Value 42.407 - Time Delta 62.65 - Time 2023-11-03T22:56:07


 41%|████      | 16340/40000 [12:36:36<17:06:27,  2.60s/it]

Episode 16340 - Step 3144993 - Epsilon 0.4555506607444145 - Mean Reward 830.89 - Mean Length 190.44 - Mean Loss 2.29 - Mean Q Value 42.409 - Time Delta 49.469 - Time 2023-11-03T22:56:57


 41%|████      | 16360/40000 [12:37:21<21:36:52,  3.29s/it]

Episode 16360 - Step 3148201 - Epsilon 0.4551854555356594 - Mean Reward 791.72 - Mean Length 181.05 - Mean Loss 2.309 - Mean Q Value 42.395 - Time Delta 46.181 - Time 2023-11-03T22:57:43


 41%|████      | 16380/40000 [12:38:08<10:22:55,  1.58s/it]

Episode 16380 - Step 3151381 - Epsilon 0.45482372685944333 - Mean Reward 769.33 - Mean Length 174.07 - Mean Loss 2.295 - Mean Q Value 42.333 - Time Delta 46.47 - Time 2023-11-03T22:58:30


 41%|████      | 16400/40000 [12:39:10<14:20:52,  2.19s/it]

Episode 16400 - Step 3155526 - Epsilon 0.45435265982729667 - Mean Reward 786.6 - Mean Length 183.82 - Mean Loss 2.274 - Mean Q Value 42.282 - Time Delta 59.309 - Time 2023-11-03T22:59:29


 41%|████      | 16401/40000 [12:39:11<11:57:54,  1.83s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3155526


 41%|████      | 16420/40000 [12:39:53<7:51:22,  1.20s/it]

Episode 16420 - Step 3158552 - Epsilon 0.4540090719756316 - Mean Reward 771.01 - Mean Length 170.41 - Mean Loss 2.254 - Mean Q Value 42.17 - Time Delta 43.003 - Time 2023-11-03T23:00:12


 41%|████      | 16440/40000 [12:40:40<15:26:31,  2.36s/it]

Episode 16440 - Step 3162168 - Epsilon 0.4535988331791039 - Mean Reward 786.28 - Mean Length 171.75 - Mean Loss 2.235 - Mean Q Value 42.117 - Time Delta 52.024 - Time 2023-11-03T23:01:04


 41%|████      | 16460/40000 [12:41:37<17:18:43,  2.65s/it]

Episode 16460 - Step 3165845 - Epsilon 0.45318205399103195 - Mean Reward 794.75 - Mean Length 176.44 - Mean Loss 2.195 - Mean Q Value 42.066 - Time Delta 52.41 - Time 2023-11-03T23:01:56


 41%|████      | 16480/40000 [12:42:21<17:16:30,  2.64s/it]

Episode 16480 - Step 3168934 - Epsilon 0.4528322192034032 - Mean Reward 788.09 - Mean Length 175.53 - Mean Loss 2.135 - Mean Q Value 42.14 - Time Delta 43.982 - Time 2023-11-03T23:02:40


 41%|████▏     | 16500/40000 [12:42:59<14:35:24,  2.24s/it]

Episode 16500 - Step 3171918 - Epsilon 0.4524945322984952 - Mean Reward 774.33 - Mean Length 163.92 - Mean Loss 2.109 - Mean Q Value 42.192 - Time Delta 42.252 - Time 2023-11-03T23:03:22


 41%|████▏     | 16501/40000 [12:43:04<20:19:23,  3.11s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3171918


 41%|████▏     | 16520/40000 [12:43:57<17:50:23,  2.74s/it]

Episode 16520 - Step 3175707 - Epsilon 0.4520661097428318 - Mean Reward 799.9 - Mean Length 171.55 - Mean Loss 2.099 - Mean Q Value 42.333 - Time Delta 53.602 - Time 2023-11-03T23:04:16


 41%|████▏     | 16540/40000 [12:44:54<16:00:02,  2.46s/it]

Episode 16540 - Step 3179805 - Epsilon 0.4516032051188553 - Mean Reward 788.62 - Mean Length 176.37 - Mean Loss 2.064 - Mean Q Value 42.422 - Time Delta 59.201 - Time 2023-11-03T23:05:15


 41%|████▏     | 16560/40000 [12:45:44<14:48:12,  2.27s/it]

Episode 16560 - Step 3183164 - Epsilon 0.4512241304661661 - Mean Reward 782.86 - Mean Length 173.19 - Mean Loss 2.105 - Mean Q Value 42.469 - Time Delta 48.668 - Time 2023-11-03T23:06:04


 41%|████▏     | 16580/40000 [12:46:25<19:23:30,  2.98s/it]

Episode 16580 - Step 3186178 - Epsilon 0.4508842611031908 - Mean Reward 772.34 - Mean Length 172.44 - Mean Loss 2.121 - Mean Q Value 42.588 - Time Delta 43.404 - Time 2023-11-03T23:06:47


 42%|████▏     | 16600/40000 [12:47:16<19:30:53,  3.00s/it]

Episode 16600 - Step 3189944 - Epsilon 0.45045995329250954 - Mean Reward 765.86 - Mean Length 180.26 - Mean Loss 2.119 - Mean Q Value 42.676 - Time Delta 54.053 - Time 2023-11-03T23:07:41


 42%|████▏     | 16601/40000 [12:47:23<27:16:26,  4.20s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3189944


 42%|████▏     | 16620/40000 [12:48:10<17:56:46,  2.76s/it]

Episode 16620 - Step 3193269 - Epsilon 0.4500856639950396 - Mean Reward 758.49 - Mean Length 175.62 - Mean Loss 2.147 - Mean Q Value 42.705 - Time Delta 48.209 - Time 2023-11-03T23:08:30


 42%|████▏     | 16640/40000 [12:49:09<23:26:51,  3.61s/it]

Episode 16640 - Step 3197350 - Epsilon 0.4496266982086479 - Mean Reward 771.22 - Mean Length 175.45 - Mean Loss 2.191 - Mean Q Value 42.885 - Time Delta 59.065 - Time 2023-11-03T23:09:29


 42%|████▏     | 16660/40000 [12:50:02<13:30:00,  2.08s/it]

Episode 16660 - Step 3201320 - Epsilon 0.44918066503543674 - Mean Reward 771.39 - Mean Length 181.56 - Mean Loss 2.172 - Mean Q Value 43.056 - Time Delta 56.245 - Time 2023-11-03T23:10:25


 42%|████▏     | 16680/40000 [12:50:51<16:13:09,  2.50s/it]

Episode 16680 - Step 3204659 - Epsilon 0.44880586788112176 - Mean Reward 782.57 - Mean Length 184.81 - Mean Loss 2.185 - Mean Q Value 43.009 - Time Delta 47.904 - Time 2023-11-03T23:11:13


 42%|████▏     | 16700/40000 [12:51:36<16:15:53,  2.51s/it]

Episode 16700 - Step 3207825 - Epsilon 0.44845077853740345 - Mean Reward 773.21 - Mean Length 178.81 - Mean Loss 2.217 - Mean Q Value 42.984 - Time Delta 44.533 - Time 2023-11-03T23:11:57


 42%|████▏     | 16701/40000 [12:51:39<17:04:21,  2.64s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3207825


 42%|████▏     | 16720/40000 [12:52:27<21:27:43,  3.32s/it]

Episode 16720 - Step 3211331 - Epsilon 0.4480578835922124 - Mean Reward 727.27 - Mean Length 180.62 - Mean Loss 2.22 - Mean Q Value 43.025 - Time Delta 49.75 - Time 2023-11-03T23:12:47


 42%|████▏     | 16740/40000 [12:53:00<11:22:12,  1.76s/it]

Episode 16740 - Step 3213762 - Epsilon 0.447785659109986 - Mean Reward 686.95 - Mean Length 164.12 - Mean Loss 2.229 - Mean Q Value 43.14 - Time Delta 35.059 - Time 2023-11-03T23:13:22


 42%|████▏     | 16760/40000 [12:54:02<16:39:17,  2.58s/it]

Episode 16760 - Step 3217890 - Epsilon 0.4473237826212454 - Mean Reward 670.38 - Mean Length 165.7 - Mean Loss 2.229 - Mean Q Value 43.261 - Time Delta 58.655 - Time 2023-11-03T23:14:21


 42%|████▏     | 16780/40000 [12:55:08<19:07:04,  2.96s/it]

Episode 16780 - Step 3222522 - Epsilon 0.4468060814230297 - Mean Reward 706.83 - Mean Length 178.63 - Mean Loss 2.253 - Mean Q Value 43.312 - Time Delta 66.664 - Time 2023-11-03T23:15:28


 42%|████▏     | 16800/40000 [12:55:56<8:27:14,  1.31s/it] 

Episode 16800 - Step 3225820 - Epsilon 0.4464378415905482 - Mean Reward 715.15 - Mean Length 179.95 - Mean Loss 2.266 - Mean Q Value 43.296 - Time Delta 47.356 - Time 2023-11-03T23:16:15


 42%|████▏     | 16801/40000 [12:55:57<7:48:07,  1.21s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3225820


 42%|████▏     | 16820/40000 [12:56:55<14:59:50,  2.33s/it]

Episode 16820 - Step 3230108 - Epsilon 0.4459595165925119 - Mean Reward 753.58 - Mean Length 187.77 - Mean Loss 2.256 - Mean Q Value 43.245 - Time Delta 61.308 - Time 2023-11-03T23:17:16


 42%|████▏     | 16840/40000 [12:57:51<10:56:56,  1.70s/it]

Episode 16840 - Step 3234001 - Epsilon 0.4455256975801199 - Mean Reward 778.74 - Mean Length 202.39 - Mean Loss 2.229 - Mean Q Value 42.987 - Time Delta 55.377 - Time 2023-11-03T23:18:12


 42%|████▏     | 16860/40000 [12:58:36<14:38:49,  2.28s/it]

Episode 16860 - Step 3237094 - Epsilon 0.4451813279504391 - Mean Reward 779.1 - Mean Length 192.04 - Mean Loss 2.2 - Mean Q Value 42.797 - Time Delta 44.829 - Time 2023-11-03T23:18:56


 42%|████▏     | 16880/40000 [12:59:27<14:57:54,  2.33s/it]

Episode 16880 - Step 3240573 - Epsilon 0.44479429977529744 - Mean Reward 742.34 - Mean Length 180.51 - Mean Loss 2.167 - Mean Q Value 42.752 - Time Delta 50.033 - Time 2023-11-03T23:19:46


 42%|████▏     | 16900/40000 [13:00:34<15:55:52,  2.48s/it]

Episode 16900 - Step 3245229 - Epsilon 0.44427686035377284 - Mean Reward 791.92 - Mean Length 194.09 - Mean Loss 2.152 - Mean Q Value 42.761 - Time Delta 66.819 - Time 2023-11-03T23:20:53


 42%|████▏     | 16901/40000 [13:00:35<13:04:34,  2.04s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3245229


 42%|████▏     | 16920/40000 [13:01:45<27:03:52,  4.22s/it]

Episode 16920 - Step 3250198 - Epsilon 0.44372530001382215 - Mean Reward 771.32 - Mean Length 200.9 - Mean Loss 2.124 - Mean Q Value 42.755 - Time Delta 72.223 - Time 2023-11-03T23:22:05


 42%|████▏     | 16940/40000 [13:02:58<30:38:57,  4.78s/it]

Episode 16940 - Step 3255239 - Epsilon 0.4431664473555426 - Mean Reward 793.95 - Mean Length 212.38 - Mean Loss 2.129 - Mean Q Value 42.654 - Time Delta 74.217 - Time 2023-11-03T23:23:20


 42%|████▏     | 16960/40000 [13:03:54<14:13:31,  2.22s/it]

Episode 16960 - Step 3259312 - Epsilon 0.44271542273096315 - Mean Reward 818.29 - Mean Length 222.18 - Mean Loss 2.138 - Mean Q Value 42.452 - Time Delta 59.171 - Time 2023-11-03T23:24:19


 42%|████▏     | 16980/40000 [13:05:00<22:17:08,  3.49s/it]

Episode 16980 - Step 3263503 - Epsilon 0.4422518105060479 - Mean Reward 820.64 - Mean Length 229.3 - Mean Loss 2.142 - Mean Q Value 42.195 - Time Delta 60.008 - Time 2023-11-03T23:25:19


 42%|████▎     | 17000/40000 [13:06:09<28:42:53,  4.49s/it]

Episode 17000 - Step 3268605 - Epsilon 0.4416880778481331 - Mean Reward 788.04 - Mean Length 233.76 - Mean Loss 2.136 - Mean Q Value 41.766 - Time Delta 73.289 - Time 2023-11-03T23:26:32


 43%|████▎     | 17001/40000 [13:06:14<28:37:25,  4.48s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3268605


 43%|████▎     | 17020/40000 [13:07:54<17:06:37,  2.68s/it]

Episode 17020 - Step 3275692 - Epsilon 0.4409062097403049 - Mean Reward 850.83 - Mean Length 254.94 - Mean Loss 2.136 - Mean Q Value 41.304 - Time Delta 101.206 - Time 2023-11-03T23:28:13


 43%|████▎     | 17040/40000 [13:08:36<12:44:29,  2.00s/it]

Episode 17040 - Step 3278712 - Epsilon 0.4405734511424875 - Mean Reward 798.17 - Mean Length 234.73 - Mean Loss 2.135 - Mean Q Value 40.909 - Time Delta 44.2 - Time 2023-11-03T23:28:58


 43%|████▎     | 17060/40000 [13:09:30<16:17:35,  2.56s/it]

Episode 17060 - Step 3282656 - Epsilon 0.4401392597568541 - Mean Reward 787.14 - Mean Length 233.44 - Mean Loss 2.132 - Mean Q Value 40.637 - Time Delta 56.895 - Time 2023-11-03T23:29:54


 43%|████▎     | 17080/40000 [13:10:18<16:13:37,  2.55s/it]

Episode 17080 - Step 3285678 - Epsilon 0.4398068600842988 - Mean Reward 793.07 - Mean Length 221.75 - Mean Loss 2.119 - Mean Q Value 40.335 - Time Delta 44.117 - Time 2023-11-03T23:30:39


 43%|████▎     | 17100/40000 [13:11:13<15:32:31,  2.44s/it]

Episode 17100 - Step 3289643 - Epsilon 0.4393711174803176 - Mean Reward 776.34 - Mean Length 210.38 - Mean Loss 2.106 - Mean Q Value 40.225 - Time Delta 57.024 - Time 2023-11-03T23:31:36


 43%|████▎     | 17101/40000 [13:11:17<18:25:17,  2.90s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3289643


 43%|████▎     | 17120/40000 [13:11:56<7:48:04,  1.23s/it]

Episode 17120 - Step 3292642 - Epsilon 0.4390418224036973 - Mean Reward 708.02 - Mean Length 169.5 - Mean Loss 2.078 - Mean Q Value 40.06 - Time Delta 43.406 - Time 2023-11-03T23:32:19


 43%|████▎     | 17140/40000 [13:12:42<12:20:51,  1.94s/it]

Episode 17140 - Step 3295655 - Epsilon 0.43871123863134637 - Mean Reward 713.64 - Mean Length 169.43 - Mean Loss 2.03 - Mean Q Value 39.919 - Time Delta 42.828 - Time 2023-11-03T23:33:02


 43%|████▎     | 17160/40000 [13:13:44<15:13:57,  2.40s/it]

Episode 17160 - Step 3300036 - Epsilon 0.43823100312411417 - Mean Reward 738.33 - Mean Length 173.8 - Mean Loss 1.982 - Mean Q Value 39.716 - Time Delta 62.719 - Time 2023-11-03T23:34:05


 43%|████▎     | 17180/40000 [13:14:41<28:36:44,  4.51s/it]

Episode 17180 - Step 3304281 - Epsilon 0.4377661771058123 - Mean Reward 744.69 - Mean Length 186.03 - Mean Loss 1.935 - Mean Q Value 39.593 - Time Delta 60.482 - Time 2023-11-03T23:35:05


 43%|████▎     | 17200/40000 [13:15:29<17:50:28,  2.82s/it]

Episode 17200 - Step 3307570 - Epsilon 0.43740637176690317 - Mean Reward 774.37 - Mean Length 179.27 - Mean Loss 1.879 - Mean Q Value 39.49 - Time Delta 47.053 - Time 2023-11-03T23:35:52


 43%|████▎     | 17201/40000 [13:15:34<20:53:36,  3.30s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3307570


 43%|████▎     | 17220/40000 [13:16:40<29:01:17,  4.59s/it]

Episode 17220 - Step 3312459 - Epsilon 0.43687207834873115 - Mean Reward 786.39 - Mean Length 198.17 - Mean Loss 1.856 - Mean Q Value 39.488 - Time Delta 69.009 - Time 2023-11-03T23:37:01


 43%|████▎     | 17240/40000 [13:17:30<24:04:26,  3.81s/it]

Episode 17240 - Step 3315866 - Epsilon 0.4365001309350443 - Mean Reward 788.48 - Mean Length 202.11 - Mean Loss 1.846 - Mean Q Value 39.398 - Time Delta 48.47 - Time 2023-11-03T23:37:50


 43%|████▎     | 17260/40000 [13:18:34<19:23:48,  3.07s/it]

Episode 17260 - Step 3320509 - Epsilon 0.4359937572881521 - Mean Reward 769.25 - Mean Length 204.73 - Mean Loss 1.817 - Mean Q Value 39.199 - Time Delta 66.142 - Time 2023-11-03T23:38:56


 43%|████▎     | 17280/40000 [13:19:39<13:15:29,  2.10s/it]

Episode 17280 - Step 3325359 - Epsilon 0.43546543515134856 - Mean Reward 771.74 - Mean Length 210.78 - Mean Loss 1.795 - Mean Q Value 39.082 - Time Delta 69.879 - Time 2023-11-03T23:40:06


 43%|████▎     | 17300/40000 [13:20:45<22:56:04,  3.64s/it]

Episode 17300 - Step 3329712 - Epsilon 0.4349917975970529 - Mean Reward 744.06 - Mean Length 221.42 - Mean Loss 1.778 - Mean Q Value 38.83 - Time Delta 62.598 - Time 2023-11-03T23:41:08


 43%|████▎     | 17301/40000 [13:20:50<24:56:49,  3.96s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3329712


 43%|████▎     | 17320/40000 [13:21:48<20:41:00,  3.28s/it]

Episode 17320 - Step 3333971 - Epsilon 0.4345288865086572 - Mean Reward 770.62 - Mean Length 215.12 - Mean Loss 1.788 - Mean Q Value 38.514 - Time Delta 61.308 - Time 2023-11-03T23:42:09


 43%|████▎     | 17340/40000 [13:22:32<12:12:28,  1.94s/it]

Episode 17340 - Step 3337002 - Epsilon 0.4341997469222253 - Mean Reward 787.45 - Mean Length 211.36 - Mean Loss 1.772 - Mean Q Value 38.357 - Time Delta 43.366 - Time 2023-11-03T23:42:53


 43%|████▎     | 17360/40000 [13:23:24<22:37:46,  3.60s/it]

Episode 17360 - Step 3340562 - Epsilon 0.43381348101295303 - Mean Reward 776.24 - Mean Length 200.53 - Mean Loss 1.768 - Mean Q Value 38.308 - Time Delta 50.153 - Time 2023-11-03T23:43:43


 43%|████▎     | 17380/40000 [13:24:17<19:48:57,  3.15s/it]

Episode 17380 - Step 3344278 - Epsilon 0.43341065538016144 - Mean Reward 772.73 - Mean Length 189.19 - Mean Loss 1.769 - Mean Q Value 38.195 - Time Delta 53.447 - Time 2023-11-03T23:44:36


 44%|████▎     | 17400/40000 [13:25:11<13:24:05,  2.13s/it]

Episode 17400 - Step 3348061 - Epsilon 0.43300095097107616 - Mean Reward 797.56 - Mean Length 183.49 - Mean Loss 1.757 - Mean Q Value 38.218 - Time Delta 53.745 - Time 2023-11-03T23:45:30


 44%|████▎     | 17401/40000 [13:25:12<11:32:17,  1.84s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3348061


 44%|████▎     | 17420/40000 [13:25:51<9:23:47,  1.50s/it]

Episode 17420 - Step 3351068 - Epsilon 0.4326755647853208 - Mean Reward 776.87 - Mean Length 170.97 - Mean Loss 1.725 - Mean Q Value 38.341 - Time Delta 43.557 - Time 2023-11-03T23:46:14


 44%|████▎     | 17440/40000 [13:26:42<13:51:12,  2.21s/it]

Episode 17440 - Step 3354659 - Epsilon 0.4322873045548364 - Mean Reward 791.44 - Mean Length 176.57 - Mean Loss 1.715 - Mean Q Value 38.605 - Time Delta 51.935 - Time 2023-11-03T23:47:06


 44%|████▎     | 17460/40000 [13:27:28<18:12:31,  2.91s/it]

Episode 17460 - Step 3357661 - Epsilon 0.43196299460471566 - Mean Reward 781.3 - Mean Length 170.99 - Mean Loss 1.713 - Mean Q Value 38.804 - Time Delta 42.878 - Time 2023-11-03T23:47:49


 44%|████▎     | 17480/40000 [13:28:21<14:44:58,  2.36s/it]

Episode 17480 - Step 3361346 - Epsilon 0.43156523189374196 - Mean Reward 784.28 - Mean Length 170.68 - Mean Loss 1.719 - Mean Q Value 39.065 - Time Delta 52.818 - Time 2023-11-03T23:48:41


 44%|████▍     | 17500/40000 [13:29:25<21:23:18,  3.42s/it]

Episode 17500 - Step 3365895 - Epsilon 0.43107471324642127 - Mean Reward 793.73 - Mean Length 178.34 - Mean Loss 1.745 - Mean Q Value 39.299 - Time Delta 65.071 - Time 2023-11-03T23:49:46


 44%|████▍     | 17501/40000 [13:29:28<20:32:58,  3.29s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3365895


 44%|████▍     | 17520/40000 [13:30:08<11:08:25,  1.78s/it]

Episode 17520 - Step 3368813 - Epsilon 0.4307603588784661 - Mean Reward 776.23 - Mean Length 177.45 - Mean Loss 1.77 - Mean Q Value 39.682 - Time Delta 42.255 - Time 2023-11-03T23:50:29


 44%|████▍     | 17540/40000 [13:30:37<5:31:01,  1.13it/s]

Episode 17540 - Step 3371012 - Epsilon 0.43052361342291673 - Mean Reward 716.45 - Mean Length 163.53 - Mean Loss 1.812 - Mean Q Value 39.897 - Time Delta 31.566 - Time 2023-11-03T23:51:00


 44%|████▍     | 17560/40000 [13:31:35<17:03:01,  2.74s/it]

Episode 17560 - Step 3374798 - Epsilon 0.4301163155559658 - Mean Reward 741.2 - Mean Length 171.37 - Mean Loss 1.83 - Mean Q Value 40.158 - Time Delta 53.796 - Time 2023-11-03T23:51:54


 44%|████▍     | 17580/40000 [13:32:30<13:52:48,  2.23s/it]

Episode 17580 - Step 3378685 - Epsilon 0.42969855298730897 - Mean Reward 753.18 - Mean Length 173.39 - Mean Loss 1.841 - Mean Q Value 40.603 - Time Delta 55.169 - Time 2023-11-03T23:52:49


 44%|████▍     | 17600/40000 [13:33:12<9:11:15,  1.48s/it]

Episode 17600 - Step 3381657 - Epsilon 0.4293794055004738 - Mean Reward 725.29 - Mean Length 157.62 - Mean Loss 1.885 - Mean Q Value 40.973 - Time Delta 42.687 - Time 2023-11-03T23:53:32


 44%|████▍     | 17601/40000 [13:33:14<10:00:48,  1.61s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3381657


 44%|████▍     | 17620/40000 [13:33:54<18:04:51,  2.91s/it]

Episode 17620 - Step 3384590 - Epsilon 0.429064678412926 - Mean Reward 731.73 - Mean Length 157.77 - Mean Loss 1.904 - Mean Q Value 41.312 - Time Delta 42.011 - Time 2023-11-03T23:54:14


 44%|████▍     | 17640/40000 [13:34:38<12:46:30,  2.06s/it]

Episode 17640 - Step 3387851 - Epsilon 0.4287150259365511 - Mean Reward 784.91 - Mean Length 168.39 - Mean Loss 1.89 - Mean Q Value 41.592 - Time Delta 46.043 - Time 2023-11-03T23:55:00


 44%|████▍     | 17660/40000 [13:35:30<13:24:37,  2.16s/it]

Episode 17660 - Step 3391764 - Epsilon 0.42829584047748936 - Mean Reward 786.28 - Mean Length 169.66 - Mean Loss 1.903 - Mean Q Value 41.927 - Time Delta 55.33 - Time 2023-11-03T23:55:55


 44%|████▍     | 17680/40000 [13:36:28<16:37:29,  2.68s/it]

Episode 17680 - Step 3395521 - Epsilon 0.42789375241916894 - Mean Reward 762.89 - Mean Length 168.36 - Mean Loss 1.9 - Mean Q Value 41.943 - Time Delta 53.096 - Time 2023-11-03T23:56:48


 44%|████▍     | 17700/40000 [13:37:18<10:30:45,  1.70s/it]

Episode 17700 - Step 3399129 - Epsilon 0.4275079662219418 - Mean Reward 794.14 - Mean Length 174.72 - Mean Loss 1.866 - Mean Q Value 42.084 - Time Delta 51.174 - Time 2023-11-03T23:57:40


 44%|████▍     | 17701/40000 [13:37:21<12:45:51,  2.06s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3399129


 44%|████▍     | 17720/40000 [13:38:06<14:21:53,  2.32s/it]

Episode 17720 - Step 3402442 - Epsilon 0.4271540292989871 - Mean Reward 797.47 - Mean Length 178.52 - Mean Loss 1.873 - Mean Q Value 41.995 - Time Delta 47.217 - Time 2023-11-03T23:58:27


 44%|████▍     | 17740/40000 [13:38:52<18:00:31,  2.91s/it]

Episode 17740 - Step 3405650 - Epsilon 0.42681158906152633 - Mean Reward 793.69 - Mean Length 177.99 - Mean Loss 1.879 - Mean Q Value 42.078 - Time Delta 45.244 - Time 2023-11-03T23:59:12


 44%|████▍     | 17760/40000 [13:39:40<14:18:06,  2.32s/it]

Episode 17760 - Step 3409275 - Epsilon 0.42642496622549125 - Mean Reward 773.28 - Mean Length 175.11 - Mean Loss 1.888 - Mean Q Value 42.082 - Time Delta 51.021 - Time 2023-11-04T00:00:03


 44%|████▍     | 17780/40000 [13:40:22<12:12:45,  1.98s/it]

Episode 17780 - Step 3412075 - Epsilon 0.426126573161551 - Mean Reward 734.88 - Mean Length 165.54 - Mean Loss 1.903 - Mean Q Value 42.163 - Time Delta 39.781 - Time 2023-11-04T00:00:43


 44%|████▍     | 17800/40000 [13:41:06<9:05:01,  1.47s/it] 

Episode 17800 - Step 3415125 - Epsilon 0.425801775453733 - Mean Reward 693.14 - Mean Length 159.96 - Mean Loss 1.921 - Mean Q Value 42.294 - Time Delta 43.641 - Time 2023-11-04T00:01:26


 45%|████▍     | 17801/40000 [13:41:08<11:16:44,  1.83s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3415125


 45%|████▍     | 17820/40000 [13:41:50<15:54:29,  2.58s/it]

Episode 17820 - Step 3418113 - Epsilon 0.42548382025873016 - Mean Reward 682.71 - Mean Length 156.71 - Mean Loss 1.894 - Mean Q Value 42.596 - Time Delta 42.821 - Time 2023-11-04T00:02:09


 45%|████▍     | 17840/40000 [13:42:33<16:10:45,  2.63s/it]

Episode 17840 - Step 3421391 - Epsilon 0.42513527905867593 - Mean Reward 678.01 - Mean Length 157.41 - Mean Loss 1.916 - Mean Q Value 42.859 - Time Delta 46.791 - Time 2023-11-04T00:02:56


 45%|████▍     | 17860/40000 [13:43:17<10:19:30,  1.68s/it]

Episode 17860 - Step 3424459 - Epsilon 0.4248093252781031 - Mean Reward 678.04 - Mean Length 151.84 - Mean Loss 1.934 - Mean Q Value 43.213 - Time Delta 43.752 - Time 2023-11-04T00:03:40


 45%|████▍     | 17880/40000 [13:44:24<21:42:59,  3.53s/it]

Episode 17880 - Step 3429151 - Epsilon 0.4243113160167634 - Mean Reward 771.61 - Mean Length 170.76 - Mean Loss 1.941 - Mean Q Value 43.696 - Time Delta 67.432 - Time 2023-11-04T00:04:47


 45%|████▍     | 17900/40000 [13:45:24<18:04:06,  2.94s/it]

Episode 17900 - Step 3432955 - Epsilon 0.4239079877177633 - Mean Reward 783.85 - Mean Length 178.3 - Mean Loss 1.959 - Mean Q Value 44.051 - Time Delta 55.621 - Time 2023-11-04T00:05:43


 45%|████▍     | 17901/40000 [13:45:25<14:30:09,  2.36s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3432955


 45%|████▍     | 17920/40000 [13:46:13<15:55:41,  2.60s/it]

Episode 17920 - Step 3436533 - Epsilon 0.4235289715155875 - Mean Reward 814.65 - Mean Length 184.2 - Mean Loss 1.979 - Mean Q Value 44.289 - Time Delta 52.413 - Time 2023-11-04T00:06:35


 45%|████▍     | 17940/40000 [13:47:04<19:34:30,  3.19s/it]

Episode 17940 - Step 3440030 - Epsilon 0.4231588630731509 - Mean Reward 828.57 - Mean Length 186.39 - Mean Loss 1.978 - Mean Q Value 44.475 - Time Delta 50.719 - Time 2023-11-04T00:07:26


 45%|████▍     | 17960/40000 [13:48:11<19:34:51,  3.20s/it]

Episode 17960 - Step 3444633 - Epsilon 0.42267219302205467 - Mean Reward 819.82 - Mean Length 201.74 - Mean Loss 1.996 - Mean Q Value 44.676 - Time Delta 65.986 - Time 2023-11-04T00:08:32


 45%|████▍     | 17980/40000 [13:48:56<9:07:19,  1.49s/it] 

Episode 17980 - Step 3447785 - Epsilon 0.4223392584857243 - Mean Reward 763.68 - Mean Length 186.34 - Mean Loss 2.012 - Mean Q Value 44.745 - Time Delta 45.951 - Time 2023-11-04T00:09:18


 45%|████▌     | 18000/40000 [13:50:17<17:44:13,  2.90s/it]

Episode 18000 - Step 3453481 - Epsilon 0.4217382753080136 - Mean Reward 806.38 - Mean Length 205.26 - Mean Loss 2.018 - Mean Q Value 44.8 - Time Delta 82.032 - Time 2023-11-04T00:10:40


 45%|████▌     | 18001/40000 [13:50:22<21:37:31,  3.54s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3453481


 45%|████▌     | 18020/40000 [13:51:09<8:20:26,  1.37s/it] 

Episode 18020 - Step 3457005 - Epsilon 0.4213668874613585 - Mean Reward 787.34 - Mean Length 204.72 - Mean Loss 2.031 - Mean Q Value 44.843 - Time Delta 51.028 - Time 2023-11-04T00:11:31


 45%|████▌     | 18040/40000 [13:51:59<17:05:04,  2.80s/it]

Episode 18040 - Step 3460305 - Epsilon 0.4210194030927299 - Mean Reward 774.72 - Mean Length 202.75 - Mean Loss 2.047 - Mean Q Value 44.801 - Time Delta 48.166 - Time 2023-11-04T00:12:19


 45%|████▌     | 18060/40000 [13:53:03<10:27:15,  1.72s/it]

Episode 18060 - Step 3465016 - Epsilon 0.42052383931112813 - Mean Reward 777.04 - Mean Length 203.83 - Mean Loss 2.071 - Mean Q Value 44.577 - Time Delta 66.762 - Time 2023-11-04T00:13:26


 45%|████▌     | 18080/40000 [13:54:18<37:51:48,  6.22s/it]

Episode 18080 - Step 3470166 - Epsilon 0.4199827631927963 - Mean Reward 792.01 - Mean Length 223.81 - Mean Loss 2.082 - Mean Q Value 44.356 - Time Delta 73.54 - Time 2023-11-04T00:14:39


 45%|████▌     | 18100/40000 [13:55:03<11:29:52,  1.89s/it]

Episode 18100 - Step 3474136 - Epsilon 0.4195661370331218 - Mean Reward 754.03 - Mean Length 206.55 - Mean Loss 2.117 - Mean Q Value 44.031 - Time Delta 56.713 - Time 2023-11-04T00:15:36


 45%|████▌     | 18101/40000 [13:55:18<36:05:16,  5.93s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3474136


 45%|████▌     | 18120/40000 [13:55:56<7:21:27,  1.21s/it]

Episode 18120 - Step 3476996 - Epsilon 0.41926625442842247 - Mean Reward 748.02 - Mean Length 199.91 - Mean Loss 2.148 - Mean Q Value 43.559 - Time Delta 40.82 - Time 2023-11-04T00:16:17


 45%|████▌     | 18140/40000 [13:56:51<17:15:50,  2.84s/it]

Episode 18140 - Step 3480899 - Epsilon 0.41885735485381753 - Mean Reward 788.93 - Mean Length 205.94 - Mean Loss 2.145 - Mean Q Value 43.259 - Time Delta 55.819 - Time 2023-11-04T00:17:13


 45%|████▌     | 18160/40000 [13:57:50<24:29:23,  4.04s/it]

Episode 18160 - Step 3484978 - Epsilon 0.41843044272159996 - Mean Reward 800.86 - Mean Length 199.62 - Mean Loss 2.099 - Mean Q Value 42.99 - Time Delta 58.248 - Time 2023-11-04T00:18:11


 45%|████▌     | 18180/40000 [13:59:04<19:00:15,  3.14s/it]

Episode 18180 - Step 3490386 - Epsilon 0.4178651069453957 - Mean Reward 815.86 - Mean Length 202.2 - Mean Loss 2.075 - Mean Q Value 42.654 - Time Delta 76.711 - Time 2023-11-04T00:19:28


 46%|████▌     | 18200/40000 [13:59:52<6:41:58,  1.11s/it]

Episode 18200 - Step 3493551 - Epsilon 0.4175346019114632 - Mean Reward 790.44 - Mean Length 194.15 - Mean Loss 2.022 - Mean Q Value 42.437 - Time Delta 45.35 - Time 2023-11-04T00:20:13


 46%|████▌     | 18201/40000 [13:59:55<10:23:47,  1.72s/it]

MarioNet saved to checkpoints2/mario_net_6.chkpt at step 3493551


 46%|████▌     | 18220/40000 [14:00:29<13:05:52,  2.16s/it]

Episode 18220 - Step 3496006 - Epsilon 0.417278418641764 - Mean Reward 776.77 - Mean Length 190.1 - Mean Loss 1.978 - Mean Q Value 42.38 - Time Delta 35.417 - Time 2023-11-04T00:20:49


 46%|████▌     | 18240/40000 [14:01:17<13:25:06,  2.22s/it]

Episode 18240 - Step 3499483 - Epsilon 0.4169158569322009 - Mean Reward 755.93 - Mean Length 185.84 - Mean Loss 1.955 - Mean Q Value 42.209 - Time Delta 49.596 - Time 2023-11-04T00:21:38


 46%|████▌     | 18243/40000 [14:01:24<13:54:21,  2.30s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3500000


 46%|████▌     | 18260/40000 [14:02:00<14:18:34,  2.37s/it]

Episode 18260 - Step 3502534 - Epsilon 0.41659797556996264 - Mean Reward 757.05 - Mean Length 175.56 - Mean Loss 1.973 - Mean Q Value 42.13 - Time Delta 43.479 - Time 2023-11-04T00:22:22


 46%|████▌     | 18280/40000 [14:03:05<24:41:34,  4.09s/it]

Episode 18280 - Step 3506927 - Epsilon 0.41614069793461433 - Mean Reward 754.73 - Mean Length 165.41 - Mean Loss 1.982 - Mean Q Value 42.18 - Time Delta 63.107 - Time 2023-11-04T00:23:25


 46%|████▌     | 18300/40000 [14:04:01<16:04:21,  2.67s/it]

Episode 18300 - Step 3510943 - Epsilon 0.41572310228972315 - Mean Reward 804.53 - Mean Length 173.92 - Mean Loss 1.999 - Mean Q Value 42.193 - Time Delta 57.153 - Time 2023-11-04T00:24:22


 46%|████▌     | 18301/40000 [14:04:04<16:40:50,  2.77s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3510943


 46%|████▌     | 18320/40000 [14:04:57<15:53:29,  2.64s/it]

Episode 18320 - Step 3514860 - Epsilon 0.4153162046506599 - Mean Reward 805.43 - Mean Length 188.54 - Mean Loss 2.033 - Mean Q Value 42.248 - Time Delta 55.076 - Time 2023-11-04T00:25:17


 46%|████▌     | 18340/40000 [14:06:00<21:52:45,  3.64s/it]

Episode 18340 - Step 3519317 - Epsilon 0.4148536962346026 - Mean Reward 806.45 - Mean Length 198.34 - Mean Loss 2.037 - Mean Q Value 42.22 - Time Delta 63.058 - Time 2023-11-04T00:26:20


 46%|████▌     | 18360/40000 [14:06:43<10:10:03,  1.69s/it]

Episode 18360 - Step 3522576 - Epsilon 0.4145158317995298 - Mean Reward 799.7 - Mean Length 200.42 - Mean Loss 2.038 - Mean Q Value 42.164 - Time Delta 45.97 - Time 2023-11-04T00:27:06


 46%|████▌     | 18380/40000 [14:07:42<20:53:54,  3.48s/it]

Episode 18380 - Step 3526612 - Epsilon 0.4140977962073536 - Mean Reward 806.29 - Mean Length 196.85 - Mean Loss 2.046 - Mean Q Value 41.978 - Time Delta 56.775 - Time 2023-11-04T00:28:03


 46%|████▌     | 18400/40000 [14:08:37<16:02:21,  2.67s/it]

Episode 18400 - Step 3530717 - Epsilon 0.41367304627792645 - Mean Reward 807.07 - Mean Length 197.74 - Mean Loss 2.039 - Mean Q Value 41.79 - Time Delta 58.503 - Time 2023-11-04T00:29:01


 46%|████▌     | 18401/40000 [14:08:43<22:09:28,  3.69s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3530717


 46%|████▌     | 18420/40000 [14:09:41<25:41:58,  4.29s/it]

Episode 18420 - Step 3535049 - Epsilon 0.4132252808215334 - Mean Reward 820.0 - Mean Length 201.89 - Mean Loss 2.016 - Mean Q Value 41.582 - Time Delta 60.617 - Time 2023-11-04T00:30:02


 46%|████▌     | 18440/40000 [14:10:20<9:42:34,  1.62s/it]

Episode 18440 - Step 3537722 - Epsilon 0.4129492352370821 - Mean Reward 786.04 - Mean Length 184.05 - Mean Loss 1.98 - Mean Q Value 41.406 - Time Delta 37.637 - Time 2023-11-04T00:30:40


 46%|████▌     | 18460/40000 [14:11:25<37:36:44,  6.29s/it]

Episode 18460 - Step 3542408 - Epsilon 0.41246574840520656 - Mean Reward 809.65 - Mean Length 198.32 - Mean Loss 1.949 - Mean Q Value 41.222 - Time Delta 65.85 - Time 2023-11-04T00:31:45


 46%|████▌     | 18480/40000 [14:12:17<19:38:46,  3.29s/it]

Episode 18480 - Step 3546943 - Epsilon 0.41199838029382774 - Mean Reward 791.6 - Mean Length 203.31 - Mean Loss 1.921 - Mean Q Value 41.112 - Time Delta 63.119 - Time 2023-11-04T00:32:48


 46%|████▋     | 18500/40000 [14:13:16<21:48:44,  3.65s/it]

Episode 18500 - Step 3550644 - Epsilon 0.4116173550437678 - Mean Reward 779.49 - Mean Length 199.27 - Mean Loss 1.887 - Mean Q Value 41.138 - Time Delta 52.022 - Time 2023-11-04T00:33:41


 46%|████▋     | 18501/40000 [14:13:22<25:52:08,  4.33s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3550644


 46%|████▋     | 18520/40000 [14:14:12<12:23:43,  2.08s/it]

Episode 18520 - Step 3554559 - Epsilon 0.4112146815975918 - Mean Reward 815.93 - Mean Length 195.1 - Mean Loss 1.891 - Mean Q Value 41.139 - Time Delta 55.155 - Time 2023-11-04T00:34:36


 46%|████▋     | 18540/40000 [14:15:04<13:38:50,  2.29s/it]

Episode 18540 - Step 3558059 - Epsilon 0.4108550260784233 - Mean Reward 838.41 - Mean Length 203.37 - Mean Loss 1.926 - Mean Q Value 41.168 - Time Delta 50.034 - Time 2023-11-04T00:35:26


 46%|████▋     | 18560/40000 [14:16:10<31:39:59,  5.32s/it]

Episode 18560 - Step 3562710 - Epsilon 0.4103775819649659 - Mean Reward 827.83 - Mean Length 203.02 - Mean Loss 1.937 - Mean Q Value 41.245 - Time Delta 66.465 - Time 2023-11-04T00:36:32


 46%|████▋     | 18580/40000 [14:17:09<28:41:50,  4.82s/it]

Episode 18580 - Step 3566789 - Epsilon 0.4099593126747163 - Mean Reward 812.18 - Mean Length 198.46 - Mean Loss 1.949 - Mean Q Value 41.328 - Time Delta 57.138 - Time 2023-11-04T00:37:29


 46%|████▋     | 18600/40000 [14:18:09<15:28:12,  2.60s/it]

Episode 18600 - Step 3570955 - Epsilon 0.409532562266014 - Mean Reward 836.93 - Mean Length 203.11 - Mean Loss 1.97 - Mean Q Value 41.472 - Time Delta 59.099 - Time 2023-11-04T00:38:28


 47%|████▋     | 18601/40000 [14:18:10<12:37:03,  2.12s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3570955


 47%|████▋     | 18620/40000 [14:19:06<22:46:36,  3.84s/it]

Episode 18620 - Step 3574958 - Epsilon 0.40912292750824325 - Mean Reward 838.94 - Mean Length 203.99 - Mean Loss 1.974 - Mean Q Value 41.701 - Time Delta 56.527 - Time 2023-11-04T00:39:25


 47%|████▋     | 18640/40000 [14:19:56<19:29:24,  3.28s/it]

Episode 18640 - Step 3578607 - Epsilon 0.40874987525526957 - Mean Reward 839.63 - Mean Length 205.48 - Mean Loss 1.959 - Mean Q Value 41.948 - Time Delta 51.66 - Time 2023-11-04T00:40:17


 47%|████▋     | 18660/40000 [14:20:40<14:09:11,  2.39s/it]

Episode 18660 - Step 3581786 - Epsilon 0.40842515030594173 - Mean Reward 827.99 - Mean Length 190.76 - Mean Loss 1.979 - Mean Q Value 42.154 - Time Delta 45.09 - Time 2023-11-04T00:41:02


 47%|████▋     | 18680/40000 [14:21:27<14:00:01,  2.36s/it]

Episode 18680 - Step 3585132 - Epsilon 0.4080836454792821 - Mean Reward 835.38 - Mean Length 183.43 - Mean Loss 1.983 - Mean Q Value 42.332 - Time Delta 47.666 - Time 2023-11-04T00:41:49


 47%|████▋     | 18700/40000 [14:22:34<17:49:39,  3.01s/it]

Episode 18700 - Step 3589731 - Epsilon 0.40761472087390144 - Mean Reward 804.58 - Mean Length 187.76 - Mean Loss 1.973 - Mean Q Value 42.441 - Time Delta 65.77 - Time 2023-11-04T00:42:55


 47%|████▋     | 18701/40000 [14:22:37<17:26:17,  2.95s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3589731


 47%|████▋     | 18720/40000 [14:23:24<13:28:20,  2.28s/it]

Episode 18720 - Step 3593341 - Epsilon 0.40724701449476003 - Mean Reward 774.47 - Mean Length 183.83 - Mean Loss 1.963 - Mean Q Value 42.499 - Time Delta 51.468 - Time 2023-11-04T00:43:47


 47%|████▋     | 18740/40000 [14:24:20<12:50:28,  2.17s/it]

Episode 18740 - Step 3597230 - Epsilon 0.4068512609522584 - Mean Reward 778.95 - Mean Length 186.23 - Mean Loss 1.978 - Mean Q Value 42.566 - Time Delta 55.013 - Time 2023-11-04T00:44:42


 47%|████▋     | 18760/40000 [14:25:11<10:39:52,  1.81s/it]

Episode 18760 - Step 3600650 - Epsilon 0.4065035517474975 - Mean Reward 783.41 - Mean Length 188.64 - Mean Loss 1.974 - Mean Q Value 42.565 - Time Delta 48.629 - Time 2023-11-04T00:45:30


 47%|████▋     | 18780/40000 [14:26:10<18:02:02,  3.06s/it]

Episode 18780 - Step 3604788 - Epsilon 0.4060832412137246 - Mean Reward 795.08 - Mean Length 196.56 - Mean Loss 1.977 - Mean Q Value 42.591 - Time Delta 58.599 - Time 2023-11-04T00:46:29


 47%|████▋     | 18800/40000 [14:27:09<16:29:21,  2.80s/it]

Episode 18800 - Step 3609028 - Epsilon 0.4056530209812052 - Mean Reward 816.17 - Mean Length 192.97 - Mean Loss 1.98 - Mean Q Value 42.489 - Time Delta 60.776 - Time 2023-11-04T00:47:30


 47%|████▋     | 18801/40000 [14:27:11<16:41:16,  2.83s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3609028


 47%|████▋     | 18820/40000 [14:27:56<19:09:24,  3.26s/it]

Episode 18820 - Step 3612482 - Epsilon 0.4053028907442587 - Mean Reward 838.61 - Mean Length 191.41 - Mean Loss 1.996 - Mean Q Value 42.372 - Time Delta 49.088 - Time 2023-11-04T00:48:19


 47%|████▋     | 18840/40000 [14:29:06<15:37:31,  2.66s/it]

Episode 18840 - Step 3617363 - Epsilon 0.404808621457356 - Mean Reward 854.08 - Mean Length 201.33 - Mean Loss 1.996 - Mean Q Value 42.357 - Time Delta 69.538 - Time 2023-11-04T00:49:28


 47%|████▋     | 18860/40000 [14:29:50<12:35:47,  2.15s/it]

Episode 18860 - Step 3620443 - Epsilon 0.4044970387545785 - Mean Reward 846.16 - Mean Length 197.93 - Mean Loss 1.978 - Mean Q Value 42.421 - Time Delta 43.749 - Time 2023-11-04T00:50:12


 47%|████▋     | 18880/40000 [14:30:41<10:36:05,  1.81s/it]

Episode 18880 - Step 3623873 - Epsilon 0.4041503311725804 - Mean Reward 835.58 - Mean Length 190.85 - Mean Loss 1.989 - Mean Q Value 42.507 - Time Delta 48.539 - Time 2023-11-04T00:51:01


 47%|████▋     | 18900/40000 [14:31:29<13:41:33,  2.34s/it]

Episode 18900 - Step 3627299 - Epsilon 0.40380432456911214 - Mean Reward 814.85 - Mean Length 182.71 - Mean Loss 2.016 - Mean Q Value 42.572 - Time Delta 48.587 - Time 2023-11-04T00:51:49


 47%|████▋     | 18901/40000 [14:31:31<13:08:12,  2.24s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3627299


 47%|████▋     | 18920/40000 [14:32:31<18:51:46,  3.22s/it]

Episode 18920 - Step 3631789 - Epsilon 0.4033513084609498 - Mean Reward 863.23 - Mean Length 193.07 - Mean Loss 2.005 - Mean Q Value 42.765 - Time Delta 63.966 - Time 2023-11-04T00:52:53


 47%|████▋     | 18940/40000 [14:33:25<17:20:58,  2.97s/it]

Episode 18940 - Step 3635611 - Epsilon 0.402966090304789 - Mean Reward 870.92 - Mean Length 182.48 - Mean Loss 2.015 - Mean Q Value 42.93 - Time Delta 54.458 - Time 2023-11-04T00:53:48


 47%|████▋     | 18960/40000 [14:34:17<16:23:03,  2.80s/it]

Episode 18960 - Step 3639325 - Epsilon 0.40259210989041716 - Mean Reward 896.17 - Mean Length 188.82 - Mean Loss 2.021 - Mean Q Value 43.16 - Time Delta 52.099 - Time 2023-11-04T00:54:40


 47%|████▋     | 18980/40000 [14:35:00<11:37:52,  1.99s/it]

Episode 18980 - Step 3642284 - Epsilon 0.4022944024679805 - Mean Reward 883.77 - Mean Length 184.11 - Mean Loss 2.034 - Mean Q Value 43.372 - Time Delta 42.026 - Time 2023-11-04T00:55:22


 48%|████▊     | 19000/40000 [14:35:50<24:12:14,  4.15s/it]

Episode 19000 - Step 3645817 - Epsilon 0.40193923276746363 - Mean Reward 870.81 - Mean Length 185.18 - Mean Loss 2.041 - Mean Q Value 43.684 - Time Delta 50.045 - Time 2023-11-04T00:56:12


 48%|████▊     | 19001/40000 [14:35:53<22:27:13,  3.85s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3645817


 48%|████▊     | 19020/40000 [14:36:38<11:26:29,  1.96s/it]

Episode 19020 - Step 3648989 - Epsilon 0.40162062126216713 - Mean Reward 795.83 - Mean Length 172.0 - Mean Loss 2.05 - Mean Q Value 43.893 - Time Delta 45.227 - Time 2023-11-04T00:56:57


 48%|████▊     | 19040/40000 [14:37:18<12:50:45,  2.21s/it]

Episode 19040 - Step 3652006 - Epsilon 0.40131781308157555 - Mean Reward 746.32 - Mean Length 163.95 - Mean Loss 2.077 - Mean Q Value 44.098 - Time Delta 42.783 - Time 2023-11-04T00:57:40


 48%|████▊     | 19060/40000 [14:38:03<11:15:22,  1.94s/it]

Episode 19060 - Step 3655136 - Epsilon 0.4010039046862328 - Mean Reward 726.97 - Mean Length 158.11 - Mean Loss 2.108 - Mean Q Value 44.301 - Time Delta 44.446 - Time 2023-11-04T00:58:24


 48%|████▊     | 19080/40000 [14:38:50<15:11:01,  2.61s/it]

Episode 19080 - Step 3658493 - Epsilon 0.40066750329930445 - Mean Reward 744.23 - Mean Length 162.09 - Mean Loss 2.104 - Mean Q Value 44.456 - Time Delta 48.224 - Time 2023-11-04T00:59:12


 48%|████▊     | 19100/40000 [14:39:35<11:36:25,  2.00s/it]

Episode 19100 - Step 3661778 - Epsilon 0.40033859014950623 - Mean Reward 736.25 - Mean Length 159.61 - Mean Loss 2.111 - Mean Q Value 44.572 - Time Delta 46.604 - Time 2023-11-04T00:59:59


 48%|████▊     | 19101/40000 [14:39:41<18:07:55,  3.12s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3661778


 48%|████▊     | 19120/40000 [14:40:20<14:49:52,  2.56s/it]

Episode 19120 - Step 3664604 - Epsilon 0.4000558507896239 - Mean Reward 737.04 - Mean Length 156.15 - Mean Loss 2.105 - Mean Q Value 44.837 - Time Delta 40.17 - Time 2023-11-04T01:00:39


 48%|████▊     | 19140/40000 [14:41:26<12:58:08,  2.24s/it]

Episode 19140 - Step 3669451 - Epsilon 0.3995713766418569 - Mean Reward 758.76 - Mean Length 174.45 - Mean Loss 2.079 - Mean Q Value 44.968 - Time Delta 69.012 - Time 2023-11-04T01:01:48


 48%|████▊     | 19160/40000 [14:42:07<8:53:14,  1.54s/it] 

Episode 19160 - Step 3672429 - Epsilon 0.39927400642453253 - Mean Reward 741.45 - Mean Length 172.93 - Mean Loss 2.062 - Mean Q Value 45.114 - Time Delta 42.513 - Time 2023-11-04T01:02:31


 48%|████▊     | 19180/40000 [14:42:59<17:55:23,  3.10s/it]

Episode 19180 - Step 3675868 - Epsilon 0.3989308780780038 - Mean Reward 735.08 - Mean Length 173.75 - Mean Loss 2.043 - Mean Q Value 45.188 - Time Delta 48.704 - Time 2023-11-04T01:03:19


 48%|████▊     | 19200/40000 [14:43:51<20:14:42,  3.50s/it]

Episode 19200 - Step 3680092 - Epsilon 0.3985098293709092 - Mean Reward 764.08 - Mean Length 183.14 - Mean Loss 2.008 - Mean Q Value 45.365 - Time Delta 60.124 - Time 2023-11-04T01:04:20


 48%|████▊     | 19201/40000 [14:44:01<31:54:14,  5.52s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3680092


 48%|████▊     | 19220/40000 [14:44:40<10:36:08,  1.84s/it]

Episode 19220 - Step 3682970 - Epsilon 0.39822320463842176 - Mean Reward 749.73 - Mean Length 183.66 - Mean Loss 2.009 - Mean Q Value 45.371 - Time Delta 40.72 - Time 2023-11-04T01:05:00


 48%|████▊     | 19240/40000 [14:45:47<14:10:34,  2.46s/it]

Episode 19240 - Step 3687773 - Epsilon 0.39774532503003385 - Mean Reward 738.19 - Mean Length 183.22 - Mean Loss 2.005 - Mean Q Value 45.306 - Time Delta 68.1 - Time 2023-11-04T01:06:08


 48%|████▊     | 19260/40000 [14:46:45<16:13:29,  2.82s/it]

Episode 19260 - Step 3691726 - Epsilon 0.3973524473260783 - Mean Reward 766.92 - Mean Length 192.97 - Mean Loss 1.999 - Mean Q Value 45.23 - Time Delta 56.234 - Time 2023-11-04T01:07:05


 48%|████▊     | 19280/40000 [14:47:27<9:19:46,  1.62s/it]

Episode 19280 - Step 3694692 - Epsilon 0.3970579196591562 - Mean Reward 751.42 - Mean Length 188.24 - Mean Loss 1.99 - Mean Q Value 45.192 - Time Delta 42.669 - Time 2023-11-04T01:07:47


 48%|████▊     | 19300/40000 [14:48:26<17:37:12,  3.06s/it]

Episode 19300 - Step 3698879 - Epsilon 0.3966425166792349 - Mean Reward 777.6 - Mean Length 187.87 - Mean Loss 1.981 - Mean Q Value 45.178 - Time Delta 59.649 - Time 2023-11-04T01:08:47


 48%|████▊     | 19301/40000 [14:48:29<16:47:55,  2.92s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3698879


 48%|████▊     | 19320/40000 [14:49:13<20:50:34,  3.63s/it]

Episode 19320 - Step 3702108 - Epsilon 0.39632245616939626 - Mean Reward 788.7 - Mean Length 191.38 - Mean Loss 1.994 - Mean Q Value 45.052 - Time Delta 46.028 - Time 2023-11-04T01:09:33


 48%|████▊     | 19340/40000 [14:50:02<12:29:26,  2.18s/it]

Episode 19340 - Step 3705655 - Epsilon 0.3959711729606411 - Mean Reward 802.39 - Mean Length 178.82 - Mean Loss 1.971 - Mean Q Value 45.163 - Time Delta 50.486 - Time 2023-11-04T01:10:23


 48%|████▊     | 19360/40000 [14:50:53<13:55:55,  2.43s/it]

Episode 19360 - Step 3709302 - Epsilon 0.39561031073160347 - Mean Reward 800.67 - Mean Length 175.76 - Mean Loss 1.972 - Mean Q Value 45.182 - Time Delta 51.78 - Time 2023-11-04T01:11:15


 48%|████▊     | 19380/40000 [14:51:40<11:07:21,  1.94s/it]

Episode 19380 - Step 3712698 - Epsilon 0.3952745800736097 - Mean Reward 811.65 - Mean Length 180.06 - Mean Loss 1.983 - Mean Q Value 45.249 - Time Delta 48.221 - Time 2023-11-04T01:12:03


 48%|████▊     | 19400/40000 [14:52:37<14:33:05,  2.54s/it]

Episode 19400 - Step 3716671 - Epsilon 0.3948821684615227 - Mean Reward 792.3 - Mean Length 177.92 - Mean Loss 1.996 - Mean Q Value 45.202 - Time Delta 55.941 - Time 2023-11-04T01:12:59


 49%|████▊     | 19401/40000 [14:52:41<16:36:11,  2.90s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3716671


 49%|████▊     | 19420/40000 [14:53:24<13:11:32,  2.31s/it]

Episode 19420 - Step 3719904 - Epsilon 0.3945631338561675 - Mean Reward 807.1 - Mean Length 177.96 - Mean Loss 1.973 - Mean Q Value 45.339 - Time Delta 45.812 - Time 2023-11-04T01:13:45


 49%|████▊     | 19440/40000 [14:54:13<16:06:40,  2.82s/it]

Episode 19440 - Step 3723240 - Epsilon 0.39423420534362524 - Mean Reward 813.74 - Mean Length 175.85 - Mean Loss 2.002 - Mean Q Value 45.373 - Time Delta 47.865 - Time 2023-11-04T01:14:33


 49%|████▊     | 19460/40000 [14:55:01<12:38:56,  2.22s/it]

Episode 19460 - Step 3726595 - Epsilon 0.39390367999597625 - Mean Reward 816.84 - Mean Length 172.93 - Mean Loss 1.986 - Mean Q Value 45.523 - Time Delta 47.806 - Time 2023-11-04T01:15:21


 49%|████▊     | 19480/40000 [14:55:49<18:10:21,  3.19s/it]

Episode 19480 - Step 3730123 - Epsilon 0.39355641007531017 - Mean Reward 815.11 - Mean Length 174.25 - Mean Loss 1.965 - Mean Q Value 45.641 - Time Delta 49.991 - Time 2023-11-04T01:16:11


 49%|████▉     | 19500/40000 [14:56:44<10:41:47,  1.88s/it]

Episode 19500 - Step 3733970 - Epsilon 0.3931780891048363 - Mean Reward 800.66 - Mean Length 172.99 - Mean Loss 1.967 - Mean Q Value 45.574 - Time Delta 54.49 - Time 2023-11-04T01:17:05


 49%|████▉     | 19501/40000 [14:56:47<12:13:19,  2.15s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3733970


 49%|████▉     | 19520/40000 [14:57:26<13:26:21,  2.36s/it]

Episode 19520 - Step 3736868 - Epsilon 0.3928933347083818 - Mean Reward 785.51 - Mean Length 169.64 - Mean Loss 1.954 - Mean Q Value 45.576 - Time Delta 41.406 - Time 2023-11-04T01:17:47


 49%|████▉     | 19540/40000 [14:58:23<17:06:33,  3.01s/it]

Episode 19540 - Step 3740834 - Epsilon 0.3925039739757759 - Mean Reward 782.88 - Mean Length 175.94 - Mean Loss 1.94 - Mean Q Value 45.51 - Time Delta 56.409 - Time 2023-11-04T01:18:43


 49%|████▉     | 19560/40000 [14:59:17<14:12:40,  2.50s/it]

Episode 19560 - Step 3744565 - Epsilon 0.39213803653885954 - Mean Reward 755.24 - Mean Length 179.7 - Mean Loss 1.931 - Mean Q Value 45.302 - Time Delta 53.502 - Time 2023-11-04T01:19:37


 49%|████▉     | 19580/40000 [15:00:10<18:09:38,  3.20s/it]

Episode 19580 - Step 3748305 - Epsilon 0.3917715587838839 - Mean Reward 770.47 - Mean Length 181.82 - Mean Loss 1.923 - Mean Q Value 45.138 - Time Delta 53.27 - Time 2023-11-04T01:20:30


 49%|████▉     | 19600/40000 [15:00:52<17:01:33,  3.00s/it]

Episode 19600 - Step 3751505 - Epsilon 0.3914582668311426 - Mean Reward 738.28 - Mean Length 175.35 - Mean Loss 1.919 - Mean Q Value 45.146 - Time Delta 46.054 - Time 2023-11-04T01:21:16


 49%|████▉     | 19601/40000 [15:00:58<20:46:16,  3.67s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3751505


 49%|████▉     | 19620/40000 [15:01:39<13:27:24,  2.38s/it]

Episode 19620 - Step 3754672 - Epsilon 0.3911484523735915 - Mean Reward 738.44 - Mean Length 178.04 - Mean Loss 1.941 - Mean Q Value 44.969 - Time Delta 45.348 - Time 2023-11-04T01:22:01


 49%|████▉     | 19640/40000 [15:02:21<11:27:48,  2.03s/it]

Episode 19640 - Step 3757576 - Epsilon 0.3908645816191965 - Mean Reward 724.42 - Mean Length 167.42 - Mean Loss 1.957 - Mean Q Value 44.884 - Time Delta 41.523 - Time 2023-11-04T01:22:43


 49%|████▉     | 19660/40000 [15:03:23<14:21:54,  2.54s/it]

Episode 19660 - Step 3761934 - Epsilon 0.3904389665002 - Mean Reward 773.74 - Mean Length 173.69 - Mean Loss 1.966 - Mean Q Value 44.858 - Time Delta 61.823 - Time 2023-11-04T01:23:45


 49%|████▉     | 19680/40000 [15:04:16<14:37:39,  2.59s/it]

Episode 19680 - Step 3765524 - Epsilon 0.39008870468743395 - Mean Reward 760.88 - Mean Length 172.19 - Mean Loss 1.992 - Mean Q Value 44.776 - Time Delta 50.601 - Time 2023-11-04T01:24:35


 49%|████▉     | 19700/40000 [15:05:01<13:06:53,  2.33s/it]

Episode 19700 - Step 3769139 - Epsilon 0.38973632123373037 - Mean Reward 787.64 - Mean Length 176.34 - Mean Loss 1.978 - Mean Q Value 44.752 - Time Delta 51.142 - Time 2023-11-04T01:25:26


 49%|████▉     | 19701/40000 [15:05:08<21:02:36,  3.73s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3769139


 49%|████▉     | 19720/40000 [15:05:53<13:24:49,  2.38s/it]

Episode 19720 - Step 3772416 - Epsilon 0.3894171604666998 - Mean Reward 802.85 - Mean Length 177.44 - Mean Loss 1.975 - Mean Q Value 44.752 - Time Delta 46.449 - Time 2023-11-04T01:26:13


 49%|████▉     | 19740/40000 [15:06:37<11:31:54,  2.05s/it]

Episode 19740 - Step 3775709 - Epsilon 0.3890967046749585 - Mean Reward 815.66 - Mean Length 181.33 - Mean Loss 1.955 - Mean Q Value 44.701 - Time Delta 46.435 - Time 2023-11-04T01:26:59


 49%|████▉     | 19760/40000 [15:07:34<14:10:29,  2.52s/it]

Episode 19760 - Step 3779592 - Epsilon 0.3887191722760871 - Mean Reward 780.85 - Mean Length 176.58 - Mean Loss 1.938 - Mean Q Value 44.697 - Time Delta 54.645 - Time 2023-11-04T01:27:54


 49%|████▉     | 19780/40000 [15:08:15<8:23:29,  1.49s/it] 

Episode 19780 - Step 3782519 - Epsilon 0.3884348310322323 - Mean Reward 766.56 - Mean Length 169.95 - Mean Loss 1.958 - Mean Q Value 44.72 - Time Delta 41.266 - Time 2023-11-04T01:28:35


 50%|████▉     | 19800/40000 [15:08:55<9:54:02,  1.76s/it]

Episode 19800 - Step 3785469 - Epsilon 0.38814846591862584 - Mean Reward 742.23 - Mean Length 163.3 - Mean Loss 1.965 - Mean Q Value 44.835 - Time Delta 42.099 - Time 2023-11-04T01:29:17


 50%|████▉     | 19801/40000 [15:08:59<13:16:03,  2.36s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3785469


 50%|████▉     | 19820/40000 [15:09:48<14:18:54,  2.55s/it]

Episode 19820 - Step 3789191 - Epsilon 0.3877874617091663 - Mean Reward 739.15 - Mean Length 167.75 - Mean Loss 1.959 - Mean Q Value 45.09 - Time Delta 52.65 - Time 2023-11-04T01:30:10


 50%|████▉     | 19840/40000 [15:10:34<15:53:19,  2.84s/it]

Episode 19840 - Step 3792412 - Epsilon 0.38747532150887887 - Mean Reward 727.83 - Mean Length 167.03 - Mean Loss 1.97 - Mean Q Value 45.432 - Time Delta 45.39 - Time 2023-11-04T01:30:55


 50%|████▉     | 19860/40000 [15:12:03<32:19:21,  5.78s/it]

Episode 19860 - Step 3798645 - Epsilon 0.386872008192204 - Mean Reward 770.47 - Mean Length 190.53 - Mean Loss 2.011 - Mean Q Value 45.567 - Time Delta 87.656 - Time 2023-11-04T01:32:23


 50%|████▉     | 19880/40000 [15:12:45<11:06:29,  1.99s/it]

Episode 19880 - Step 3801719 - Epsilon 0.3865748112292772 - Mean Reward 793.57 - Mean Length 192.0 - Mean Loss 2.009 - Mean Q Value 45.571 - Time Delta 43.622 - Time 2023-11-04T01:33:07


 50%|████▉     | 19900/40000 [15:13:44<17:55:23,  3.21s/it]

Episode 19900 - Step 3805714 - Epsilon 0.3861889123286136 - Mean Reward 852.78 - Mean Length 202.45 - Mean Loss 2.035 - Mean Q Value 45.528 - Time Delta 56.75 - Time 2023-11-04T01:34:03


 50%|████▉     | 19901/40000 [15:13:45<14:35:21,  2.61s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3805714


 50%|████▉     | 19920/40000 [15:14:22<13:08:36,  2.36s/it]

Episode 19920 - Step 3808581 - Epsilon 0.3859122105659155 - Mean Reward 835.6 - Mean Length 193.9 - Mean Loss 2.05 - Mean Q Value 45.342 - Time Delta 40.494 - Time 2023-11-04T01:34:44


 50%|████▉     | 19940/40000 [15:15:16<19:28:09,  3.49s/it]

Episode 19940 - Step 3812332 - Epsilon 0.3855504909729731 - Mean Reward 855.9 - Mean Length 199.2 - Mean Loss 2.058 - Mean Q Value 45.158 - Time Delta 53.22 - Time 2023-11-04T01:35:37


 50%|████▉     | 19960/40000 [15:16:02<11:08:28,  2.00s/it]

Episode 19960 - Step 3815644 - Epsilon 0.3852313872536873 - Mean Reward 819.87 - Mean Length 169.99 - Mean Loss 2.051 - Mean Q Value 45.203 - Time Delta 47.25 - Time 2023-11-04T01:36:24


 50%|████▉     | 19980/40000 [15:16:54<10:07:49,  1.82s/it]

Episode 19980 - Step 3819334 - Epsilon 0.3848761751212803 - Mean Reward 844.03 - Mean Length 176.15 - Mean Loss 2.02 - Mean Q Value 45.409 - Time Delta 52.222 - Time 2023-11-04T01:37:17


 50%|█████     | 20000/40000 [15:17:59<20:47:35,  3.74s/it]

Episode 20000 - Step 3823775 - Epsilon 0.38444910341645805 - Mean Reward 862.25 - Mean Length 180.61 - Mean Loss 2.02 - Mean Q Value 45.486 - Time Delta 62.333 - Time 2023-11-04T01:38:19


 50%|█████     | 20001/40000 [15:18:01<17:51:45,  3.22s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3823775


 50%|█████     | 20020/40000 [15:18:57<16:07:15,  2.90s/it]

Episode 20020 - Step 3827982 - Epsilon 0.38404497158144957 - Mean Reward 897.06 - Mean Length 194.01 - Mean Loss 1.994 - Mean Q Value 45.621 - Time Delta 59.627 - Time 2023-11-04T01:39:18


 50%|█████     | 20040/40000 [15:19:56<18:28:30,  3.33s/it]

Episode 20040 - Step 3832016 - Epsilon 0.38365785741429326 - Mean Reward 925.11 - Mean Length 196.84 - Mean Loss 2.007 - Mean Q Value 45.619 - Time Delta 58.354 - Time 2023-11-04T01:40:17


 50%|█████     | 20060/40000 [15:20:53<13:07:53,  2.37s/it]

Episode 20060 - Step 3835964 - Epsilon 0.38327937387401534 - Mean Reward 932.23 - Mean Length 203.2 - Mean Loss 2.007 - Mean Q Value 45.563 - Time Delta 57.168 - Time 2023-11-04T01:41:14


 50%|█████     | 20080/40000 [15:21:37<11:06:31,  2.01s/it]

Episode 20080 - Step 3838899 - Epsilon 0.38299824574974817 - Mean Reward 891.78 - Mean Length 195.65 - Mean Loss 2.027 - Mean Q Value 45.324 - Time Delta 42.258 - Time 2023-11-04T01:41:56


 50%|█████     | 20100/40000 [15:22:23<13:39:05,  2.47s/it]

Episode 20100 - Step 3842100 - Epsilon 0.38269187396860815 - Mean Reward 825.91 - Mean Length 183.25 - Mean Loss 2.022 - Mean Q Value 45.254 - Time Delta 45.583 - Time 2023-11-04T01:42:42


 50%|█████     | 20101/40000 [15:22:24<11:17:09,  2.04s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3842100


 50%|█████     | 20120/40000 [15:23:11<13:52:25,  2.51s/it]

Episode 20120 - Step 3845657 - Epsilon 0.3823517164420706 - Mean Reward 816.52 - Mean Length 176.75 - Mean Loss 2.033 - Mean Q Value 45.312 - Time Delta 51.188 - Time 2023-11-04T01:43:33


 50%|█████     | 20140/40000 [15:24:12<19:13:05,  3.48s/it]

Episode 20140 - Step 3849836 - Epsilon 0.3819524630326699 - Mean Reward 798.05 - Mean Length 178.2 - Mean Loss 2.0 - Mean Q Value 45.363 - Time Delta 59.902 - Time 2023-11-04T01:44:33


 50%|█████     | 20160/40000 [15:24:58<14:32:55,  2.64s/it]

Episode 20160 - Step 3853119 - Epsilon 0.38163910412167634 - Mean Reward 795.29 - Mean Length 171.55 - Mean Loss 1.979 - Mean Q Value 45.545 - Time Delta 46.999 - Time 2023-11-04T01:45:20


 50%|█████     | 20180/40000 [15:25:59<11:12:09,  2.03s/it]

Episode 20180 - Step 3857296 - Epsilon 0.38124078544568685 - Mean Reward 845.55 - Mean Length 183.97 - Mean Loss 1.955 - Mean Q Value 45.935 - Time Delta 60.419 - Time 2023-11-04T01:46:20


 50%|█████     | 20200/40000 [15:26:59<13:45:16,  2.50s/it]

Episode 20200 - Step 3861420 - Epsilon 0.380847928699184 - Mean Reward 848.58 - Mean Length 193.2 - Mean Loss 1.961 - Mean Q Value 46.034 - Time Delta 59.124 - Time 2023-11-04T01:47:19


 51%|█████     | 20201/40000 [15:27:01<13:14:03,  2.41s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3861420


 51%|█████     | 20220/40000 [15:27:47<15:03:26,  2.74s/it]

Episode 20220 - Step 3864986 - Epsilon 0.3805085540270174 - Mean Reward 852.98 - Mean Length 193.29 - Mean Loss 1.978 - Mean Q Value 46.024 - Time Delta 51.375 - Time 2023-11-04T01:48:11


 51%|█████     | 20240/40000 [15:28:40<9:57:59,  1.82s/it] 

Episode 20240 - Step 3868653 - Epsilon 0.38015988261315853 - Mean Reward 820.05 - Mean Length 188.17 - Mean Loss 2.001 - Mean Q Value 46.049 - Time Delta 52.923 - Time 2023-11-04T01:49:04


 51%|█████     | 20260/40000 [15:29:40<19:08:55,  3.49s/it]

Episode 20260 - Step 3872518 - Epsilon 0.37979273048957163 - Mean Reward 848.92 - Mean Length 193.99 - Mean Loss 2.047 - Mean Q Value 45.995 - Time Delta 55.532 - Time 2023-11-04T01:49:59


 51%|█████     | 20280/40000 [15:30:33<16:02:48,  2.93s/it]

Episode 20280 - Step 3876206 - Epsilon 0.3794427229265411 - Mean Reward 832.35 - Mean Length 189.1 - Mean Loss 2.078 - Mean Q Value 45.944 - Time Delta 53.475 - Time 2023-11-04T01:50:53


 51%|█████     | 20300/40000 [15:31:29<14:00:00,  2.56s/it]

Episode 20300 - Step 3880352 - Epsilon 0.379049634248438 - Mean Reward 876.35 - Mean Length 189.32 - Mean Loss 2.072 - Mean Q Value 46.049 - Time Delta 59.447 - Time 2023-11-04T01:51:52


 51%|█████     | 20301/40000 [15:31:34<17:39:56,  3.23s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3880352


 51%|█████     | 20320/40000 [15:32:32<15:32:04,  2.84s/it]

Episode 20320 - Step 3884610 - Epsilon 0.37864635054814416 - Mean Reward 917.43 - Mean Length 196.24 - Mean Loss 2.074 - Mean Q Value 46.149 - Time Delta 61.181 - Time 2023-11-04T01:52:53


 51%|█████     | 20340/40000 [15:33:28<8:31:37,  1.56s/it]

Episode 20340 - Step 3888371 - Epsilon 0.3782904955950377 - Mean Reward 929.84 - Mean Length 197.18 - Mean Loss 2.066 - Mean Q Value 46.278 - Time Delta 55.762 - Time 2023-11-04T01:53:49


 51%|█████     | 20360/40000 [15:34:15<17:57:25,  3.29s/it]

Episode 20360 - Step 3891598 - Epsilon 0.3779854327708036 - Mean Reward 898.46 - Mean Length 190.8 - Mean Loss 2.046 - Mean Q Value 46.388 - Time Delta 46.356 - Time 2023-11-04T01:54:36


 51%|█████     | 20380/40000 [15:35:09<18:45:18,  3.44s/it]

Episode 20380 - Step 3895513 - Epsilon 0.3776156604690443 - Mean Reward 917.5 - Mean Length 193.07 - Mean Loss 2.057 - Mean Q Value 46.497 - Time Delta 55.573 - Time 2023-11-04T01:55:31


 51%|█████     | 20400/40000 [15:36:04<9:10:41,  1.69s/it]

Episode 20400 - Step 3899422 - Epsilon 0.3772468157747888 - Mean Reward 895.44 - Mean Length 190.7 - Mean Loss 2.051 - Mean Q Value 46.756 - Time Delta 55.26 - Time 2023-11-04T01:56:26


 51%|█████     | 20401/40000 [15:36:08<12:16:01,  2.25s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3899422


 51%|█████     | 20420/40000 [15:36:43<10:59:13,  2.02s/it]

Episode 20420 - Step 3902282 - Epsilon 0.37697718067380387 - Mean Reward 825.7 - Mean Length 176.72 - Mean Loss 2.07 - Mean Q Value 46.961 - Time Delta 40.466 - Time 2023-11-04T01:57:07


 51%|█████     | 20440/40000 [15:37:44<17:59:26,  3.31s/it]

Episode 20440 - Step 3906438 - Epsilon 0.37658570474105957 - Mean Reward 859.12 - Mean Length 180.67 - Mean Loss 2.094 - Mean Q Value 47.115 - Time Delta 58.567 - Time 2023-11-04T01:58:05


 51%|█████     | 20460/40000 [15:38:41<13:59:51,  2.58s/it]

Episode 20460 - Step 3910437 - Epsilon 0.3762094012716938 - Mean Reward 876.97 - Mean Length 188.39 - Mean Loss 2.114 - Mean Q Value 47.276 - Time Delta 56.266 - Time 2023-11-04T01:59:02


 51%|█████     | 20480/40000 [15:39:34<14:21:04,  2.65s/it]

Episode 20480 - Step 3914294 - Epsilon 0.3758468161505884 - Mean Reward 854.17 - Mean Length 187.81 - Mean Loss 2.106 - Mean Q Value 47.399 - Time Delta 54.527 - Time 2023-11-04T01:59:56


 51%|█████▏    | 20500/40000 [15:40:26<12:05:34,  2.23s/it]

Episode 20500 - Step 3917921 - Epsilon 0.375506171470539 - Mean Reward 854.64 - Mean Length 184.99 - Mean Loss 2.121 - Mean Q Value 47.299 - Time Delta 51.275 - Time 2023-11-04T02:00:47


 51%|█████▏    | 20501/40000 [15:40:29<14:20:08,  2.65s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3917921


 51%|█████▏    | 20520/40000 [15:41:18<15:30:28,  2.87s/it]

Episode 20520 - Step 3921657 - Epsilon 0.3751556123990874 - Mean Reward 880.71 - Mean Length 193.75 - Mean Loss 2.113 - Mean Q Value 47.164 - Time Delta 52.894 - Time 2023-11-04T02:01:40


 51%|█████▏    | 20540/40000 [15:42:10<9:22:36,  1.73s/it] 

Episode 20540 - Step 3925140 - Epsilon 0.3748290877900127 - Mean Reward 856.84 - Mean Length 187.02 - Mean Loss 2.121 - Mean Q Value 47.102 - Time Delta 49.55 - Time 2023-11-04T02:02:30


 51%|█████▏    | 20560/40000 [15:43:02<15:55:43,  2.95s/it]

Episode 20560 - Step 3928798 - Epsilon 0.3744864632353084 - Mean Reward 856.71 - Mean Length 183.61 - Mean Loss 2.106 - Mean Q Value 47.015 - Time Delta 52.263 - Time 2023-11-04T02:03:22


 51%|█████▏    | 20580/40000 [15:43:55<9:52:29,  1.83s/it] 

Episode 20580 - Step 3932719 - Epsilon 0.3741195526952165 - Mean Reward 874.98 - Mean Length 184.25 - Mean Loss 2.107 - Mean Q Value 46.989 - Time Delta 55.402 - Time 2023-11-04T02:04:18


 52%|█████▏    | 20600/40000 [15:44:51<13:40:07,  2.54s/it]

Episode 20600 - Step 3936586 - Epsilon 0.3737580473432152 - Mean Reward 889.13 - Mean Length 186.65 - Mean Loss 2.131 - Mean Q Value 46.953 - Time Delta 54.728 - Time 2023-11-04T02:05:12


 52%|█████▏    | 20601/40000 [15:44:54<14:13:14,  2.64s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3936586


 52%|█████▏    | 20620/40000 [15:45:50<16:45:31,  3.11s/it]

Episode 20620 - Step 3940675 - Epsilon 0.37337616835268406 - Mean Reward 900.27 - Mean Length 190.18 - Mean Loss 2.106 - Mean Q Value 47.042 - Time Delta 58.13 - Time 2023-11-04T02:06:10


 52%|█████▏    | 20640/40000 [15:46:38<11:02:10,  2.05s/it]

Episode 20640 - Step 3944313 - Epsilon 0.37303673706486734 - Mean Reward 906.7 - Mean Length 191.73 - Mean Loss 2.079 - Mean Q Value 47.097 - Time Delta 52.275 - Time 2023-11-04T02:07:03


 52%|█████▏    | 20660/40000 [15:47:34<13:28:32,  2.51s/it]

Episode 20660 - Step 3948022 - Epsilon 0.37269099902473063 - Mean Reward 898.49 - Mean Length 192.24 - Mean Loss 2.084 - Mean Q Value 47.135 - Time Delta 53.537 - Time 2023-11-04T02:07:56


 52%|█████▏    | 20680/40000 [15:48:29<12:50:19,  2.39s/it]

Episode 20680 - Step 3951788 - Epsilon 0.3723402755342504 - Mean Reward 900.99 - Mean Length 190.69 - Mean Loss 2.085 - Mean Q Value 47.109 - Time Delta 54.371 - Time 2023-11-04T02:08:51


 52%|█████▏    | 20700/40000 [15:49:23<14:32:35,  2.71s/it]

Episode 20700 - Step 3955350 - Epsilon 0.37200885406472284 - Mean Reward 890.62 - Mean Length 187.64 - Mean Loss 2.073 - Mean Q Value 47.157 - Time Delta 52.083 - Time 2023-11-04T02:09:43


 52%|█████▏    | 20701/40000 [15:49:24<11:57:55,  2.23s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3955350


 52%|█████▏    | 20720/40000 [15:50:06<11:51:26,  2.21s/it]

Episode 20720 - Step 3958457 - Epsilon 0.3717200083461614 - Mean Reward 863.77 - Mean Length 177.82 - Mean Loss 2.067 - Mean Q Value 47.159 - Time Delta 44.686 - Time 2023-11-04T02:10:27


 52%|█████▏    | 20740/40000 [15:51:01<8:43:11,  1.63s/it] 

Episode 20740 - Step 3962382 - Epsilon 0.3713554369394478 - Mean Reward 846.87 - Mean Length 180.69 - Mean Loss 2.08 - Mean Q Value 47.183 - Time Delta 56.279 - Time 2023-11-04T02:11:24


 52%|█████▏    | 20760/40000 [15:51:57<16:35:10,  3.10s/it]

Episode 20760 - Step 3966285 - Epsilon 0.3709932635506079 - Mean Reward 860.37 - Mean Length 182.63 - Mean Loss 2.074 - Mean Q Value 47.193 - Time Delta 55.741 - Time 2023-11-04T02:12:19


 52%|█████▏    | 20780/40000 [15:52:57<12:14:20,  2.29s/it]

Episode 20780 - Step 3970264 - Epsilon 0.37062440144873615 - Mean Reward 833.27 - Mean Length 184.76 - Mean Loss 2.061 - Mean Q Value 47.275 - Time Delta 57.024 - Time 2023-11-04T02:13:16


 52%|█████▏    | 20800/40000 [15:53:37<6:25:22,  1.20s/it]

Episode 20800 - Step 3973193 - Epsilon 0.37035311103515167 - Mean Reward 801.53 - Mean Length 178.43 - Mean Loss 2.022 - Mean Q Value 47.442 - Time Delta 41.741 - Time 2023-11-04T02:13:58


 52%|█████▏    | 20801/40000 [15:53:40<8:41:09,  1.63s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3973193


 52%|█████▏    | 20820/40000 [15:55:00<17:56:40,  3.37s/it]

Episode 20820 - Step 3979245 - Epsilon 0.3697931903948653 - Mean Reward 811.47 - Mean Length 207.88 - Mean Loss 2.023 - Mean Q Value 47.448 - Time Delta 86.43 - Time 2023-11-04T02:15:25


 52%|█████▏    | 20840/40000 [15:55:58<18:35:22,  3.49s/it]

Episode 20840 - Step 3983025 - Epsilon 0.36944390085153184 - Mean Reward 830.62 - Mean Length 206.43 - Mean Loss 2.036 - Mean Q Value 47.347 - Time Delta 54.184 - Time 2023-11-04T02:16:19


 52%|█████▏    | 20860/40000 [15:56:49<12:28:40,  2.35s/it]

Episode 20860 - Step 3986498 - Epsilon 0.3691232703583496 - Mean Reward 792.62 - Mean Length 202.13 - Mean Loss 2.01 - Mean Q Value 47.16 - Time Delta 50.319 - Time 2023-11-04T02:17:09


 52%|█████▏    | 20880/40000 [15:57:39<10:56:09,  2.06s/it]

Episode 20880 - Step 3990720 - Epsilon 0.36873386624193133 - Mean Reward 795.82 - Mean Length 204.56 - Mean Loss 2.012 - Mean Q Value 46.92 - Time Delta 60.687 - Time 2023-11-04T02:18:10


 52%|█████▏    | 20900/40000 [15:58:45<11:36:54,  2.19s/it]

Episode 20900 - Step 3994585 - Epsilon 0.36837774917587596 - Mean Reward 855.71 - Mean Length 213.92 - Mean Loss 2.027 - Mean Q Value 46.576 - Time Delta 56.208 - Time 2023-11-04T02:19:06


 52%|█████▏    | 20901/40000 [15:58:48<12:47:20,  2.41s/it]

MarioNet saved to checkpoints2/mario_net_7.chkpt at step 3994585


 52%|█████▏    | 20920/40000 [15:59:52<9:52:59,  1.86s/it] 

Episode 20920 - Step 3999252 - Epsilon 0.3679481950231542 - Mean Reward 831.73 - Mean Length 200.07 - Mean Loss 2.02 - Mean Q Value 46.141 - Time Delta 67.023 - Time 2023-11-04T02:20:13


 52%|█████▏    | 20923/40000 [16:00:04<17:50:36,  3.37s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4000000


 52%|█████▏    | 20940/40000 [16:00:58<17:30:35,  3.31s/it]

Episode 20940 - Step 4003769 - Epsilon 0.3675329239884928 - Mean Reward 851.54 - Mean Length 207.44 - Mean Loss 1.992 - Mean Q Value 45.839 - Time Delta 65.767 - Time 2023-11-04T02:21:19


 52%|█████▏    | 20960/40000 [16:02:21<22:39:58,  4.29s/it]

Episode 20960 - Step 4009450 - Epsilon 0.36701130578972596 - Mean Reward 897.09 - Mean Length 229.52 - Mean Loss 1.979 - Mean Q Value 45.579 - Time Delta 81.631 - Time 2023-11-04T02:22:40


 52%|█████▏    | 20980/40000 [16:03:13<15:54:34,  3.01s/it]

Episode 20980 - Step 4013157 - Epsilon 0.3666713355775727 - Mean Reward 912.82 - Mean Length 224.37 - Mean Loss 1.929 - Mean Q Value 45.23 - Time Delta 53.67 - Time 2023-11-04T02:23:34


 52%|█████▎    | 21000/40000 [16:04:25<16:59:08,  3.22s/it]

Episode 21000 - Step 4018123 - Epsilon 0.3662163955199975 - Mean Reward 892.47 - Mean Length 235.38 - Mean Loss 1.874 - Mean Q Value 44.877 - Time Delta 72.846 - Time 2023-11-04T02:24:47


 53%|█████▎    | 21001/40000 [16:04:29<17:08:34,  3.25s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4018123


 53%|█████▎    | 21020/40000 [16:06:03<15:50:25,  3.00s/it]

Episode 21020 - Step 4024695 - Epsilon 0.3656151959276332 - Mean Reward 937.3 - Mean Length 254.43 - Mean Loss 1.84 - Mean Q Value 44.521 - Time Delta 95.798 - Time 2023-11-04T02:26:23


 53%|█████▎    | 21040/40000 [16:07:34<21:03:22,  4.00s/it]

Episode 21040 - Step 4031070 - Epsilon 0.3650329607281773 - Mean Reward 919.38 - Mean Length 273.01 - Mean Loss 1.775 - Mean Q Value 43.855 - Time Delta 93.117 - Time 2023-11-04T02:27:56


 53%|█████▎    | 21060/40000 [16:08:46<18:17:50,  3.48s/it]

Episode 21060 - Step 4035853 - Epsilon 0.36459673337220544 - Mean Reward 913.93 - Mean Length 264.03 - Mean Loss 1.72 - Mean Q Value 43.236 - Time Delta 73.041 - Time 2023-11-04T02:29:09


 53%|█████▎    | 21080/40000 [16:09:36<8:18:16,  1.58s/it] 

Episode 21080 - Step 4038798 - Epsilon 0.36432839778683906 - Mean Reward 865.21 - Mean Length 256.41 - Mean Loss 1.681 - Mean Q Value 42.677 - Time Delta 46.109 - Time 2023-11-04T02:29:55


 53%|█████▎    | 21100/40000 [16:10:28<8:38:29,  1.65s/it]

Episode 21100 - Step 4042482 - Epsilon 0.3639930057622415 - Mean Reward 842.43 - Mean Length 243.59 - Mean Loss 1.676 - Mean Q Value 42.012 - Time Delta 55.509 - Time 2023-11-04T02:30:51


 53%|█████▎    | 21101/40000 [16:10:32<12:11:50,  2.32s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4042482


 53%|█████▎    | 21120/40000 [16:11:21<14:14:37,  2.72s/it]

Episode 21120 - Step 4045968 - Epsilon 0.3636759240064848 - Mean Reward 804.07 - Mean Length 212.73 - Mean Loss 1.676 - Mean Q Value 41.513 - Time Delta 52.915 - Time 2023-11-04T02:31:43


 53%|█████▎    | 21140/40000 [16:12:08<13:27:31,  2.57s/it]

Episode 21140 - Step 4048882 - Epsilon 0.3634110825429204 - Mean Reward 770.96 - Mean Length 178.12 - Mean Loss 1.673 - Mean Q Value 41.303 - Time Delta 44.238 - Time 2023-11-04T02:32:28


 53%|█████▎    | 21160/40000 [16:13:03<13:35:13,  2.60s/it]

Episode 21160 - Step 4052821 - Epsilon 0.363053389582601 - Mean Reward 753.83 - Mean Length 169.68 - Mean Loss 1.714 - Mean Q Value 41.134 - Time Delta 57.392 - Time 2023-11-04T02:33:25


 53%|█████▎    | 21180/40000 [16:13:47<12:55:42,  2.47s/it]

Episode 21180 - Step 4055834 - Epsilon 0.3627800225524721 - Mean Reward 764.68 - Mean Length 170.36 - Mean Loss 1.743 - Mean Q Value 40.977 - Time Delta 43.642 - Time 2023-11-04T02:34:09


 53%|█████▎    | 21200/40000 [16:14:47<11:27:14,  2.19s/it]

Episode 21200 - Step 4059878 - Epsilon 0.3624134372438951 - Mean Reward 768.66 - Mean Length 173.96 - Mean Loss 1.763 - Mean Q Value 40.901 - Time Delta 58.729 - Time 2023-11-04T02:35:07


 53%|█████▎    | 21201/40000 [16:14:49<11:21:21,  2.17s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4059878


 53%|█████▎    | 21220/40000 [16:15:29<9:20:47,  1.79s/it] 

Episode 21220 - Step 4062788 - Epsilon 0.362149877316866 - Mean Reward 764.79 - Mean Length 168.2 - Mean Loss 1.785 - Mean Q Value 40.877 - Time Delta 43.0 - Time 2023-11-04T02:35:50


 53%|█████▎    | 21240/40000 [16:16:14<12:50:06,  2.46s/it]

Episode 21240 - Step 4065694 - Epsilon 0.36188687094671534 - Mean Reward 764.93 - Mean Length 168.12 - Mean Loss 1.798 - Mean Q Value 40.779 - Time Delta 42.493 - Time 2023-11-04T02:36:33


 53%|█████▎    | 21260/40000 [16:17:03<9:04:31,  1.74s/it] 

Episode 21260 - Step 4069333 - Epsilon 0.36155779403624677 - Mean Reward 776.34 - Mean Length 165.12 - Mean Loss 1.792 - Mean Q Value 40.827 - Time Delta 52.847 - Time 2023-11-04T02:37:26


 53%|█████▎    | 21280/40000 [16:18:16<16:45:40,  3.22s/it]

Episode 21280 - Step 4074176 - Epsilon 0.3611203027820818 - Mean Reward 839.72 - Mean Length 183.42 - Mean Loss 1.797 - Mean Q Value 40.874 - Time Delta 70.977 - Time 2023-11-04T02:38:37


 53%|█████▎    | 21300/40000 [16:19:31<15:33:56,  3.00s/it]

Episode 21300 - Step 4079486 - Epsilon 0.3606412335727301 - Mean Reward 878.11 - Mean Length 196.08 - Mean Loss 1.776 - Mean Q Value 40.98 - Time Delta 75.482 - Time 2023-11-04T02:39:52


 53%|█████▎    | 21301/40000 [16:19:34<15:10:35,  2.92s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4079486


 53%|█████▎    | 21320/40000 [16:21:02<18:39:55,  3.60s/it]

Episode 21320 - Step 4085849 - Epsilon 0.36006799951560875 - Mean Reward 871.57 - Mean Length 230.61 - Mean Loss 1.747 - Mean Q Value 40.991 - Time Delta 88.959 - Time 2023-11-04T02:41:21


 53%|█████▎    | 21340/40000 [16:21:41<10:15:37,  1.98s/it]

Episode 21340 - Step 4088762 - Epsilon 0.3598058754196808 - Mean Reward 862.86 - Mean Length 230.68 - Mean Loss 1.725 - Mean Q Value 40.956 - Time Delta 40.915 - Time 2023-11-04T02:42:02


 53%|█████▎    | 21360/40000 [16:22:41<13:01:26,  2.52s/it]

Episode 21360 - Step 4092893 - Epsilon 0.3594344776688879 - Mean Reward 849.45 - Mean Length 235.6 - Mean Loss 1.721 - Mean Q Value 40.556 - Time Delta 58.472 - Time 2023-11-04T02:43:01


 53%|█████▎    | 21380/40000 [16:23:50<12:53:21,  2.49s/it]

Episode 21380 - Step 4098077 - Epsilon 0.35896895225319936 - Mean Reward 827.1 - Mean Length 239.01 - Mean Loss 1.678 - Mean Q Value 40.203 - Time Delta 73.436 - Time 2023-11-04T02:44:14


 54%|█████▎    | 21400/40000 [16:24:44<14:25:34,  2.79s/it]

Episode 21400 - Step 4101563 - Epsilon 0.35865624705326304 - Mean Reward 784.2 - Mean Length 220.77 - Mean Loss 1.661 - Mean Q Value 39.906 - Time Delta 49.341 - Time 2023-11-04T02:45:03


 54%|█████▎    | 21401/40000 [16:24:45<11:42:30,  2.27s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4101563


 54%|█████▎    | 21420/40000 [16:25:35<11:50:54,  2.30s/it]

Episode 21420 - Step 4105375 - Epsilon 0.3583146104227801 - Mean Reward 835.79 - Mean Length 195.26 - Mean Loss 1.636 - Mean Q Value 39.667 - Time Delta 54.353 - Time 2023-11-04T02:45:58


 54%|█████▎    | 21440/40000 [16:26:32<12:20:16,  2.39s/it]

Episode 21440 - Step 4109366 - Epsilon 0.35795728026823925 - Mean Reward 883.44 - Mean Length 206.04 - Mean Loss 1.627 - Mean Q Value 39.655 - Time Delta 56.13 - Time 2023-11-04T02:46:54


 54%|█████▎    | 21460/40000 [16:27:18<9:13:56,  1.79s/it]

Episode 21460 - Step 4112470 - Epsilon 0.35767961313278696 - Mean Reward 869.4 - Mean Length 195.77 - Mean Loss 1.608 - Mean Q Value 39.956 - Time Delta 44.725 - Time 2023-11-04T02:47:39


 54%|█████▎    | 21480/40000 [16:28:07<13:30:40,  2.63s/it]

Episode 21480 - Step 4115929 - Epsilon 0.35737044334519424 - Mean Reward 842.16 - Mean Length 178.52 - Mean Loss 1.612 - Mean Q Value 40.163 - Time Delta 50.365 - Time 2023-11-04T02:48:29


 54%|█████▍    | 21500/40000 [16:28:54<8:09:29,  1.59s/it] 

Episode 21500 - Step 4119120 - Epsilon 0.3570854647243138 - Mean Reward 852.24 - Mean Length 175.57 - Mean Loss 1.613 - Mean Q Value 40.423 - Time Delta 46.328 - Time 2023-11-04T02:49:15


 54%|█████▍    | 21501/40000 [16:28:57<10:11:53,  1.98s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4119120


 54%|█████▍    | 21520/40000 [16:29:49<16:43:33,  3.26s/it]

Episode 21520 - Step 4122907 - Epsilon 0.3567475540020293 - Mean Reward 839.34 - Mean Length 175.32 - Mean Loss 1.651 - Mean Q Value 40.796 - Time Delta 55.047 - Time 2023-11-04T02:50:10


 54%|█████▍    | 21540/40000 [16:30:50<16:36:29,  3.24s/it]

Episode 21540 - Step 4127101 - Epsilon 0.3563737001719942 - Mean Reward 840.25 - Mean Length 177.35 - Mean Loss 1.679 - Mean Q Value 41.198 - Time Delta 61.093 - Time 2023-11-04T02:51:11


 54%|█████▍    | 21560/40000 [16:31:43<9:30:33,  1.86s/it]

Episode 21560 - Step 4130734 - Epsilon 0.3560501706634022 - Mean Reward 859.31 - Mean Length 182.64 - Mean Loss 1.714 - Mean Q Value 41.631 - Time Delta 52.81 - Time 2023-11-04T02:52:04


 54%|█████▍    | 21580/40000 [16:32:23<10:08:35,  1.98s/it]

Episode 21580 - Step 4133361 - Epsilon 0.3558164114536486 - Mean Reward 834.77 - Mean Length 174.32 - Mean Loss 1.778 - Mean Q Value 42.159 - Time Delta 39.308 - Time 2023-11-04T02:52:44


 54%|█████▍    | 21600/40000 [16:33:17<13:21:00,  2.61s/it]

Episode 21600 - Step 4136967 - Epsilon 0.3554957874615796 - Mean Reward 834.03 - Mean Length 178.47 - Mean Loss 1.808 - Mean Q Value 42.666 - Time Delta 53.207 - Time 2023-11-04T02:53:37


 54%|█████▍    | 21601/40000 [16:33:18<10:55:14,  2.14s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4136967


 54%|█████▍    | 21620/40000 [16:34:11<10:35:46,  2.08s/it]

Episode 21620 - Step 4140584 - Epsilon 0.3551744756505554 - Mean Reward 843.0 - Mean Length 176.77 - Mean Loss 1.811 - Mean Q Value 43.098 - Time Delta 53.357 - Time 2023-11-04T02:54:30


 54%|█████▍    | 21640/40000 [16:35:01<11:14:47,  2.21s/it]

Episode 21640 - Step 4143977 - Epsilon 0.3548733266067942 - Mean Reward 814.63 - Mean Length 168.76 - Mean Loss 1.838 - Mean Q Value 43.418 - Time Delta 50.071 - Time 2023-11-04T02:55:20


 54%|█████▍    | 21660/40000 [16:35:50<9:46:43,  1.92s/it] 

Episode 21660 - Step 4147434 - Epsilon 0.3545667597901778 - Mean Reward 816.61 - Mean Length 167.0 - Mean Loss 1.841 - Mean Q Value 43.641 - Time Delta 50.373 - Time 2023-11-04T02:56:11


 54%|█████▍    | 21680/40000 [16:36:32<6:34:06,  1.29s/it]

Episode 21680 - Step 4150232 - Epsilon 0.3543188270352702 - Mean Reward 810.22 - Mean Length 168.71 - Mean Loss 1.85 - Mean Q Value 43.849 - Time Delta 40.799 - Time 2023-11-04T02:56:51


 54%|█████▍    | 21700/40000 [16:37:32<17:12:24,  3.38s/it]

Episode 21700 - Step 4154782 - Epsilon 0.35391601845991993 - Mean Reward 801.02 - Mean Length 178.15 - Mean Loss 1.875 - Mean Q Value 43.994 - Time Delta 65.141 - Time 2023-11-04T02:57:56


 54%|█████▍    | 21701/40000 [16:37:38<20:51:37,  4.10s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4154782


 54%|█████▍    | 21720/40000 [16:38:35<25:39:24,  5.05s/it]

Episode 21720 - Step 4158872 - Epsilon 0.35355432423346866 - Mean Reward 787.26 - Mean Length 182.88 - Mean Loss 1.886 - Mean Q Value 44.023 - Time Delta 58.746 - Time 2023-11-04T02:58:55


 54%|█████▍    | 21740/40000 [16:39:21<8:08:52,  1.61s/it] 

Episode 21740 - Step 4162551 - Epsilon 0.35322929210014475 - Mean Reward 789.55 - Mean Length 185.74 - Mean Loss 1.932 - Mean Q Value 44.103 - Time Delta 52.017 - Time 2023-11-04T02:59:47


 54%|█████▍    | 21760/40000 [16:40:11<11:33:51,  2.28s/it]

Episode 21760 - Step 4165676 - Epsilon 0.3529534394501417 - Mean Reward 780.12 - Mean Length 182.42 - Mean Loss 1.964 - Mean Q Value 44.079 - Time Delta 45.086 - Time 2023-11-04T03:00:32


 54%|█████▍    | 21780/40000 [16:41:07<14:41:36,  2.90s/it]

Episode 21780 - Step 4169590 - Epsilon 0.35260824338116115 - Mean Reward 812.96 - Mean Length 193.58 - Mean Loss 1.973 - Mean Q Value 44.02 - Time Delta 55.802 - Time 2023-11-04T03:01:28


 55%|█████▍    | 21800/40000 [16:41:54<11:49:51,  2.34s/it]

Episode 21800 - Step 4172718 - Epsilon 0.35233261148646083 - Mean Reward 803.95 - Mean Length 179.36 - Mean Loss 2.01 - Mean Q Value 43.893 - Time Delta 44.821 - Time 2023-11-04T03:02:13


 55%|█████▍    | 21801/40000 [16:41:55<9:52:06,  1.95s/it] 

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4172718


 55%|█████▍    | 21820/40000 [16:42:43<7:46:13,  1.54s/it]

Episode 21820 - Step 4176188 - Epsilon 0.35202709544450705 - Mean Reward 799.36 - Mean Length 173.16 - Mean Loss 2.02 - Mean Q Value 43.949 - Time Delta 50.656 - Time 2023-11-04T03:03:04


 55%|█████▍    | 21840/40000 [16:43:38<13:09:18,  2.61s/it]

Episode 21840 - Step 4180145 - Epsilon 0.35167902478961544 - Mean Reward 827.54 - Mean Length 175.94 - Mean Loss 1.996 - Mean Q Value 43.999 - Time Delta 57.99 - Time 2023-11-04T03:04:02


 55%|█████▍    | 21860/40000 [16:44:27<9:44:28,  1.93s/it] 

Episode 21860 - Step 4183318 - Epsilon 0.3514001659853002 - Mean Reward 830.83 - Mean Length 176.42 - Mean Loss 2.019 - Mean Q Value 44.212 - Time Delta 47.116 - Time 2023-11-04T03:04:49


 55%|█████▍    | 21880/40000 [16:45:27<13:35:06,  2.70s/it]

Episode 21880 - Step 4187380 - Epsilon 0.3510435001998128 - Mean Reward 878.14 - Mean Length 177.9 - Mean Loss 2.032 - Mean Q Value 44.636 - Time Delta 59.798 - Time 2023-11-04T03:05:48


 55%|█████▍    | 21900/40000 [16:46:30<14:40:54,  2.92s/it]

Episode 21900 - Step 4191576 - Epsilon 0.350675448598859 - Mean Reward 910.12 - Mean Length 188.58 - Mean Loss 2.027 - Mean Q Value 45.188 - Time Delta 61.887 - Time 2023-11-04T03:06:50


 55%|█████▍    | 21901/40000 [16:46:32<13:07:54,  2.61s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4191576


 55%|█████▍    | 21920/40000 [16:47:13<10:23:22,  2.07s/it]

Episode 21920 - Step 4194620 - Epsilon 0.35040868606511116 - Mean Reward 905.91 - Mean Length 184.32 - Mean Loss 2.044 - Mean Q Value 45.85 - Time Delta 45.075 - Time 2023-11-04T03:07:35


 55%|█████▍    | 21940/40000 [16:48:09<14:07:36,  2.82s/it]

Episode 21940 - Step 4198214 - Epsilon 0.3500939852218245 - Mean Reward 887.17 - Mean Length 180.69 - Mean Loss 2.055 - Mean Q Value 46.413 - Time Delta 53.467 - Time 2023-11-04T03:08:29


 55%|█████▍    | 21960/40000 [16:49:14<19:41:38,  3.93s/it]

Episode 21960 - Step 4202526 - Epsilon 0.34971678720480887 - Mean Reward 940.54 - Mean Length 192.08 - Mean Loss 2.051 - Mean Q Value 46.982 - Time Delta 64.757 - Time 2023-11-04T03:09:34


 55%|█████▍    | 21980/40000 [16:50:09<13:26:51,  2.69s/it]

Episode 21980 - Step 4206425 - Epsilon 0.3493760668094243 - Mean Reward 913.02 - Mean Length 190.45 - Mean Loss 2.035 - Mean Q Value 47.383 - Time Delta 58.706 - Time 2023-11-04T03:10:32


 55%|█████▌    | 22000/40000 [16:51:01<10:12:03,  2.04s/it]

Episode 22000 - Step 4209709 - Epsilon 0.3490893467373876 - Mean Reward 903.8 - Mean Length 181.33 - Mean Loss 2.033 - Mean Q Value 47.613 - Time Delta 49.658 - Time 2023-11-04T03:11:22


 55%|█████▌    | 22001/40000 [16:51:04<11:49:30,  2.37s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4209709


 55%|█████▌    | 22020/40000 [16:51:50<14:33:50,  2.92s/it]

Episode 22020 - Step 4212903 - Epsilon 0.3488107101196254 - Mean Reward 893.9 - Mean Length 182.83 - Mean Loss 2.054 - Mean Q Value 47.696 - Time Delta 48.739 - Time 2023-11-04T03:12:11


 55%|█████▌    | 22040/40000 [16:52:58<17:15:41,  3.46s/it]

Episode 22040 - Step 4217587 - Epsilon 0.34840249178547245 - Mean Reward 906.6 - Mean Length 193.73 - Mean Loss 2.051 - Mean Q Value 47.807 - Time Delta 69.869 - Time 2023-11-04T03:13:21


 55%|█████▌    | 22060/40000 [16:54:02<20:10:24,  4.05s/it]

Episode 22060 - Step 4221928 - Epsilon 0.3480245930283717 - Mean Reward 888.62 - Mean Length 194.02 - Mean Loss 1.999 - Mean Q Value 47.714 - Time Delta 65.699 - Time 2023-11-04T03:14:26


 55%|█████▌    | 22080/40000 [16:54:53<10:02:38,  2.02s/it]

Episode 22080 - Step 4225286 - Epsilon 0.34773254894863237 - Mean Reward 873.6 - Mean Length 188.61 - Mean Loss 1.98 - Mean Q Value 47.619 - Time Delta 50.881 - Time 2023-11-04T03:15:17


 55%|█████▌    | 22100/40000 [16:56:11<16:17:21,  3.28s/it]

Episode 22100 - Step 4230338 - Epsilon 0.34729363991378553 - Mean Reward 910.79 - Mean Length 206.29 - Mean Loss 1.949 - Mean Q Value 47.659 - Time Delta 75.753 - Time 2023-11-04T03:16:33


 55%|█████▌    | 22101/40000 [16:56:15<17:02:11,  3.43s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4230338


 55%|█████▌    | 22120/40000 [16:57:05<12:19:26,  2.48s/it]

Episode 22120 - Step 4233802 - Epsilon 0.34699301377390684 - Mean Reward 929.35 - Mean Length 208.99 - Mean Loss 1.918 - Mean Q Value 47.607 - Time Delta 52.176 - Time 2023-11-04T03:17:25


 55%|█████▌    | 22140/40000 [16:57:57<13:54:13,  2.80s/it]

Episode 22140 - Step 4237339 - Epsilon 0.3466863207797221 - Mean Reward 913.18 - Mean Length 197.52 - Mean Loss 1.922 - Mean Q Value 47.565 - Time Delta 53.126 - Time 2023-11-04T03:18:18


 55%|█████▌    | 22160/40000 [16:58:52<13:45:53,  2.78s/it]

Episode 22160 - Step 4240871 - Epsilon 0.3463803318341842 - Mean Reward 892.38 - Mean Length 189.43 - Mean Loss 1.942 - Mean Q Value 47.565 - Time Delta 52.967 - Time 2023-11-04T03:19:11


 55%|█████▌    | 22180/40000 [16:59:56<14:37:09,  2.95s/it]

Episode 22180 - Step 4245327 - Epsilon 0.34599467894480157 - Mean Reward 931.87 - Mean Length 200.41 - Mean Loss 1.944 - Mean Q Value 47.624 - Time Delta 66.58 - Time 2023-11-04T03:20:18


 56%|█████▌    | 22200/40000 [17:00:59<14:01:45,  2.84s/it]

Episode 22200 - Step 4249586 - Epsilon 0.3456264771205502 - Mean Reward 889.87 - Mean Length 192.48 - Mean Loss 1.944 - Mean Q Value 47.626 - Time Delta 63.415 - Time 2023-11-04T03:21:21


 56%|█████▌    | 22201/40000 [17:01:03<15:12:02,  3.07s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4249586


 56%|█████▌    | 22220/40000 [17:02:19<16:05:21,  3.26s/it]

Episode 22220 - Step 4255138 - Epsilon 0.34514708028868113 - Mean Reward 932.87 - Mean Length 213.36 - Mean Loss 1.921 - Mean Q Value 47.575 - Time Delta 81.127 - Time 2023-11-04T03:22:42


 56%|█████▌    | 22240/40000 [17:03:16<15:12:13,  3.08s/it]

Episode 22240 - Step 4259010 - Epsilon 0.34481313952657794 - Mean Reward 927.79 - Mean Length 216.71 - Mean Loss 1.904 - Mean Q Value 47.521 - Time Delta 56.66 - Time 2023-11-04T03:23:39


 56%|█████▌    | 22260/40000 [17:04:13<11:52:49,  2.41s/it]

Episode 22260 - Step 4262792 - Epsilon 0.3444872727401757 - Mean Reward 941.72 - Mean Length 219.21 - Mean Loss 1.905 - Mean Q Value 47.497 - Time Delta 55.863 - Time 2023-11-04T03:24:35


 56%|█████▌    | 22280/40000 [17:05:16<14:04:09,  2.86s/it]

Episode 22280 - Step 4266981 - Epsilon 0.3441266972379792 - Mean Reward 943.68 - Mean Length 216.54 - Mean Loss 1.896 - Mean Q Value 47.385 - Time Delta 62.005 - Time 2023-11-04T03:25:37


 56%|█████▌    | 22300/40000 [17:06:02<8:31:00,  1.73s/it] 

Episode 22300 - Step 4270194 - Epsilon 0.3438503884212307 - Mean Reward 935.9 - Mean Length 206.08 - Mean Loss 1.879 - Mean Q Value 47.299 - Time Delta 47.971 - Time 2023-11-04T03:26:25


 56%|█████▌    | 22301/40000 [17:06:07<13:13:54,  2.69s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4270194


 56%|█████▌    | 22320/40000 [17:06:56<7:11:21,  1.46s/it]

Episode 22320 - Step 4274014 - Epsilon 0.34352216800940716 - Mean Reward 899.9 - Mean Length 188.76 - Mean Loss 1.871 - Mean Q Value 47.279 - Time Delta 55.815 - Time 2023-11-04T03:27:21


 56%|█████▌    | 22340/40000 [17:08:00<15:12:06,  3.10s/it]

Episode 22340 - Step 4278224 - Epsilon 0.34316080108505925 - Mean Reward 921.53 - Mean Length 192.14 - Mean Loss 1.853 - Mean Q Value 47.29 - Time Delta 61.519 - Time 2023-11-04T03:28:22


 56%|█████▌    | 22360/40000 [17:08:54<13:46:52,  2.81s/it]

Episode 22360 - Step 4281938 - Epsilon 0.34284232411721116 - Mean Reward 908.24 - Mean Length 191.46 - Mean Loss 1.838 - Mean Q Value 47.253 - Time Delta 54.732 - Time 2023-11-04T03:29:17


 56%|█████▌    | 22380/40000 [17:09:35<7:49:03,  1.60s/it] 

Episode 22380 - Step 4284709 - Epsilon 0.34260490231394153 - Mean Reward 820.14 - Mean Length 177.28 - Mean Loss 1.823 - Mean Q Value 47.237 - Time Delta 40.915 - Time 2023-11-04T03:29:58


 56%|█████▌    | 22400/40000 [17:10:31<14:13:21,  2.91s/it]

Episode 22400 - Step 4288407 - Epsilon 0.34228831040929136 - Mean Reward 849.24 - Mean Length 182.13 - Mean Loss 1.833 - Mean Q Value 47.162 - Time Delta 54.118 - Time 2023-11-04T03:30:52


 56%|█████▌    | 22401/40000 [17:10:34<14:18:30,  2.93s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4288407


 56%|█████▌    | 22420/40000 [17:11:19<13:10:00,  2.70s/it]

Episode 22420 - Step 4291813 - Epsilon 0.34199697592977796 - Mean Reward 858.61 - Mean Length 177.99 - Mean Loss 1.828 - Mean Q Value 47.243 - Time Delta 50.078 - Time 2023-11-04T03:31:42


 56%|█████▌    | 22440/40000 [17:12:20<12:33:49,  2.58s/it]

Episode 22440 - Step 4295855 - Epsilon 0.34165156249190887 - Mean Reward 846.98 - Mean Length 176.31 - Mean Loss 1.836 - Mean Q Value 47.213 - Time Delta 59.439 - Time 2023-11-04T03:32:41


 56%|█████▌    | 22460/40000 [17:13:20<14:26:05,  2.96s/it]

Episode 22460 - Step 4300256 - Epsilon 0.3412758670306277 - Mean Reward 815.62 - Mean Length 183.18 - Mean Loss 1.848 - Mean Q Value 47.191 - Time Delta 62.137 - Time 2023-11-04T03:33:44


 56%|█████▌    | 22480/40000 [17:14:16<17:00:47,  3.50s/it]

Episode 22480 - Step 4304095 - Epsilon 0.3409484846040462 - Mean Reward 870.29 - Mean Length 193.86 - Mean Loss 1.879 - Mean Q Value 46.972 - Time Delta 54.542 - Time 2023-11-04T03:34:38


 56%|█████▋    | 22500/40000 [17:15:09<11:39:35,  2.40s/it]

Episode 22500 - Step 4307665 - Epsilon 0.34064432379542553 - Mean Reward 823.87 - Mean Length 192.58 - Mean Loss 1.903 - Mean Q Value 46.779 - Time Delta 50.202 - Time 2023-11-04T03:35:28


 56%|█████▋    | 22501/40000 [17:15:10<9:40:30,  1.99s/it] 

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4307665


 56%|█████▋    | 22520/40000 [17:15:57<10:05:08,  2.08s/it]

Episode 22520 - Step 4311285 - Epsilon 0.3403361800998835 - Mean Reward 824.49 - Mean Length 194.72 - Mean Loss 1.917 - Mean Q Value 46.491 - Time Delta 50.686 - Time 2023-11-04T03:36:19


 56%|█████▋    | 22540/40000 [17:16:45<11:14:51,  2.32s/it]

Episode 22540 - Step 4314571 - Epsilon 0.3400567087015536 - Mean Reward 826.24 - Mean Length 187.16 - Mean Loss 1.917 - Mean Q Value 46.264 - Time Delta 46.618 - Time 2023-11-04T03:37:06


 56%|█████▋    | 22560/40000 [17:17:33<11:47:36,  2.43s/it]

Episode 22560 - Step 4317872 - Epsilon 0.33977619263145703 - Mean Reward 829.79 - Mean Length 176.16 - Mean Loss 1.908 - Mean Q Value 46.097 - Time Delta 46.994 - Time 2023-11-04T03:37:53


 56%|█████▋    | 22580/40000 [17:18:21<15:06:05,  3.12s/it]

Episode 22580 - Step 4321149 - Epsilon 0.33949794494359553 - Mean Reward 816.51 - Mean Length 170.54 - Mean Loss 1.91 - Mean Q Value 46.237 - Time Delta 47.403 - Time 2023-11-04T03:38:40


 56%|█████▋    | 22600/40000 [17:19:09<15:25:14,  3.19s/it]

Episode 22600 - Step 4325232 - Epsilon 0.33915157917945243 - Mean Reward 861.8 - Mean Length 175.67 - Mean Loss 1.904 - Mean Q Value 46.373 - Time Delta 58.429 - Time 2023-11-04T03:39:38


 57%|█████▋    | 22601/40000 [17:19:20<26:35:41,  5.50s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4325232


 57%|█████▋    | 22620/40000 [17:20:09<9:47:45,  2.03s/it] 

Episode 22620 - Step 4328757 - Epsilon 0.33885283346707873 - Mean Reward 822.95 - Mean Length 174.72 - Mean Loss 1.923 - Mean Q Value 46.424 - Time Delta 50.111 - Time 2023-11-04T03:40:29


 57%|█████▋    | 22640/40000 [17:20:54<10:11:01,  2.11s/it]

Episode 22640 - Step 4331981 - Epsilon 0.33857982808508874 - Mean Reward 777.95 - Mean Length 174.1 - Mean Loss 1.944 - Mean Q Value 46.437 - Time Delta 45.298 - Time 2023-11-04T03:41:14


 57%|█████▋    | 22660/40000 [17:21:52<14:58:58,  3.11s/it]

Episode 22660 - Step 4336057 - Epsilon 0.3382349909214912 - Mean Reward 786.79 - Mean Length 181.85 - Mean Loss 1.959 - Mean Q Value 46.462 - Time Delta 57.456 - Time 2023-11-04T03:42:11


 57%|█████▋    | 22680/40000 [17:22:47<11:41:07,  2.43s/it]

Episode 22680 - Step 4340129 - Epsilon 0.33789084285875254 - Mean Reward 794.88 - Mean Length 189.8 - Mean Loss 1.958 - Mean Q Value 46.338 - Time Delta 57.605 - Time 2023-11-04T03:43:09


 57%|█████▋    | 22700/40000 [17:23:40<9:45:33,  2.03s/it] 

Episode 22700 - Step 4344125 - Epsilon 0.3375534584161053 - Mean Reward 780.06 - Mean Length 188.93 - Mean Loss 1.966 - Mean Q Value 46.155 - Time Delta 56.983 - Time 2023-11-04T03:44:06


 57%|█████▋    | 22701/40000 [17:23:48<17:54:11,  3.73s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4344125


 57%|█████▋    | 22720/40000 [17:25:02<16:00:54,  3.34s/it]

Episode 22720 - Step 4349653 - Epsilon 0.33708728168055796 - Mean Reward 849.94 - Mean Length 208.96 - Mean Loss 1.962 - Mean Q Value 46.002 - Time Delta 78.993 - Time 2023-11-04T03:45:25


 57%|█████▋    | 22740/40000 [17:26:02<13:00:35,  2.71s/it]

Episode 22740 - Step 4353688 - Epsilon 0.33674741629105714 - Mean Reward 904.27 - Mean Length 217.07 - Mean Loss 1.952 - Mean Q Value 45.926 - Time Delta 57.393 - Time 2023-11-04T03:46:22


 57%|█████▋    | 22760/40000 [17:26:59<14:31:02,  3.03s/it]

Episode 22760 - Step 4357844 - Epsilon 0.3363977173818117 - Mean Reward 937.23 - Mean Length 217.87 - Mean Loss 1.947 - Mean Q Value 45.848 - Time Delta 59.803 - Time 2023-11-04T03:47:22


 57%|█████▋    | 22780/40000 [17:27:56<16:38:51,  3.48s/it]

Episode 22780 - Step 4361509 - Epsilon 0.3360896340969159 - Mean Reward 932.76 - Mean Length 213.8 - Mean Loss 1.941 - Mean Q Value 45.915 - Time Delta 52.759 - Time 2023-11-04T03:48:15


 57%|█████▋    | 22800/40000 [17:28:55<18:53:22,  3.95s/it]

Episode 22800 - Step 4365829 - Epsilon 0.3357268531836483 - Mean Reward 894.45 - Mean Length 217.04 - Mean Loss 1.943 - Mean Q Value 45.988 - Time Delta 60.744 - Time 2023-11-04T03:49:16


 57%|█████▋    | 22801/40000 [17:28:57<15:52:21,  3.32s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4365829


 57%|█████▋    | 22820/40000 [17:30:09<9:07:45,  1.91s/it]

Episode 22820 - Step 4371196 - Epsilon 0.3352766936903413 - Mean Reward 824.17 - Mean Length 215.43 - Mean Loss 1.966 - Mean Q Value 45.848 - Time Delta 75.852 - Time 2023-11-04T03:50:31


 57%|█████▋    | 22840/40000 [17:30:51<12:08:48,  2.55s/it]

Episode 22840 - Step 4373916 - Epsilon 0.3350487830085249 - Mean Reward 781.17 - Mean Length 202.28 - Mean Loss 1.973 - Mean Q Value 45.573 - Time Delta 38.721 - Time 2023-11-04T03:51:10


 57%|█████▋    | 22860/40000 [17:31:59<15:30:15,  3.26s/it]

Episode 22860 - Step 4378995 - Epsilon 0.33462362474257146 - Mean Reward 775.58 - Mean Length 211.51 - Mean Loss 1.948 - Mean Q Value 45.337 - Time Delta 71.232 - Time 2023-11-04T03:52:21


 57%|█████▋    | 22880/40000 [17:32:48<12:34:55,  2.65s/it]

Episode 22880 - Step 4382330 - Epsilon 0.33434474853321833 - Mean Reward 780.67 - Mean Length 208.21 - Mean Loss 1.96 - Mean Q Value 44.844 - Time Delta 47.583 - Time 2023-11-04T03:53:09


 57%|█████▋    | 22900/40000 [17:33:36<9:52:57,  2.08s/it]

Episode 22900 - Step 4385781 - Epsilon 0.33405641696225524 - Mean Reward 827.11 - Mean Length 199.52 - Mean Loss 1.91 - Mean Q Value 44.389 - Time Delta 48.999 - Time 2023-11-04T03:53:58


 57%|█████▋    | 22901/40000 [17:33:40<11:45:29,  2.48s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4385781


 57%|█████▋    | 22920/40000 [17:34:44<15:12:18,  3.20s/it]

Episode 22920 - Step 4390411 - Epsilon 0.333669970310409 - Mean Reward 881.67 - Mean Length 192.15 - Mean Loss 1.848 - Mean Q Value 44.203 - Time Delta 67.206 - Time 2023-11-04T03:55:05


 57%|█████▋    | 22940/40000 [17:35:48<21:16:18,  4.49s/it]

Episode 22940 - Step 4395039 - Epsilon 0.3332841373538834 - Mean Reward 929.67 - Mean Length 211.23 - Mean Loss 1.815 - Mean Q Value 44.01 - Time Delta 65.95 - Time 2023-11-04T03:56:11


 57%|█████▋    | 22960/40000 [17:36:43<12:26:41,  2.63s/it]

Episode 22960 - Step 4398822 - Epsilon 0.3329690828464398 - Mean Reward 935.75 - Mean Length 198.27 - Mean Loss 1.809 - Mean Q Value 43.797 - Time Delta 53.619 - Time 2023-11-04T03:57:05


 57%|█████▋    | 22980/40000 [17:37:34<10:48:25,  2.29s/it]

Episode 22980 - Step 4402440 - Epsilon 0.33266804843666903 - Mean Reward 954.22 - Mean Length 201.1 - Mean Loss 1.776 - Mean Q Value 43.782 - Time Delta 52.497 - Time 2023-11-04T03:57:57


 57%|█████▊    | 23000/40000 [17:38:31<13:58:37,  2.96s/it]

Episode 23000 - Step 4406346 - Epsilon 0.33234335660332814 - Mean Reward 945.78 - Mean Length 205.65 - Mean Loss 1.772 - Mean Q Value 43.793 - Time Delta 56.305 - Time 2023-11-04T03:58:54


 58%|█████▊    | 23001/40000 [17:38:35<15:18:44,  3.24s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4406346


 58%|█████▊    | 23020/40000 [17:39:21<15:53:24,  3.37s/it]

Episode 23020 - Step 4410254 - Epsilon 0.33201881566737695 - Mean Reward 905.92 - Mean Length 198.43 - Mean Loss 1.779 - Mean Q Value 43.868 - Time Delta 55.363 - Time 2023-11-04T03:59:49


 58%|█████▊    | 23040/40000 [17:40:20<15:56:21,  3.38s/it]

Episode 23040 - Step 4413804 - Epsilon 0.33172427965130324 - Mean Reward 886.74 - Mean Length 187.65 - Mean Loss 1.761 - Mean Q Value 44.01 - Time Delta 52.009 - Time 2023-11-04T04:00:41


 58%|█████▊    | 23060/40000 [17:41:00<9:57:40,  2.12s/it] 

Episode 23060 - Step 4416628 - Epsilon 0.33149016493279276 - Mean Reward 821.36 - Mean Length 178.06 - Mean Loss 1.753 - Mean Q Value 44.033 - Time Delta 40.738 - Time 2023-11-04T04:01:22


 58%|█████▊    | 23080/40000 [17:41:47<10:57:03,  2.33s/it]

Episode 23080 - Step 4419798 - Epsilon 0.331227563013974 - Mean Reward 778.01 - Mean Length 173.58 - Mean Loss 1.735 - Mean Q Value 43.982 - Time Delta 45.205 - Time 2023-11-04T04:02:07


 58%|█████▊    | 23100/40000 [17:42:39<13:38:37,  2.91s/it]

Episode 23100 - Step 4423480 - Epsilon 0.3309228082889639 - Mean Reward 780.02 - Mean Length 171.34 - Mean Loss 1.755 - Mean Q Value 43.931 - Time Delta 52.62 - Time 2023-11-04T04:03:00


 58%|█████▊    | 23101/40000 [17:42:41<12:18:53,  2.62s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4423480


 58%|█████▊    | 23120/40000 [17:43:26<10:16:03,  2.19s/it]

Episode 23120 - Step 4426806 - Epsilon 0.33064776030649684 - Mean Reward 765.36 - Mean Length 165.52 - Mean Loss 1.755 - Mean Q Value 43.772 - Time Delta 47.713 - Time 2023-11-04T04:03:47


 58%|█████▊    | 23140/40000 [17:44:25<11:25:53,  2.44s/it]

Episode 23140 - Step 4430836 - Epsilon 0.3303148004030383 - Mean Reward 767.85 - Mean Length 170.32 - Mean Loss 1.779 - Mean Q Value 43.71 - Time Delta 59.057 - Time 2023-11-04T04:04:46


 58%|█████▊    | 23160/40000 [17:45:17<13:14:28,  2.83s/it]

Episode 23160 - Step 4434304 - Epsilon 0.3300285415464126 - Mean Reward 800.77 - Mean Length 176.76 - Mean Loss 1.794 - Mean Q Value 43.828 - Time Delta 50.271 - Time 2023-11-04T04:05:37


 58%|█████▊    | 23180/40000 [17:46:15<13:06:29,  2.81s/it]

Episode 23180 - Step 4438769 - Epsilon 0.32966035267448124 - Mean Reward 820.02 - Mean Length 189.71 - Mean Loss 1.809 - Mean Q Value 43.955 - Time Delta 63.606 - Time 2023-11-04T04:06:40


 58%|█████▊    | 23200/40000 [17:47:15<9:26:57,  2.02s/it] 

Episode 23200 - Step 4442736 - Epsilon 0.3293335740469576 - Mean Reward 830.61 - Mean Length 192.56 - Mean Loss 1.803 - Mean Q Value 43.997 - Time Delta 56.712 - Time 2023-11-04T04:07:37


 58%|█████▊    | 23201/40000 [17:47:19<11:26:29,  2.45s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4442736


 58%|█████▊    | 23220/40000 [17:48:19<18:29:58,  3.97s/it]

Episode 23220 - Step 4447303 - Epsilon 0.32895777196859716 - Mean Reward 904.62 - Mean Length 204.97 - Mean Loss 1.812 - Mean Q Value 44.1 - Time Delta 66.366 - Time 2023-11-04T04:08:43


 58%|█████▊    | 23240/40000 [17:49:19<13:03:20,  2.80s/it]

Episode 23240 - Step 4451246 - Epsilon 0.3286336615764749 - Mean Reward 925.23 - Mean Length 204.1 - Mean Loss 1.794 - Mean Q Value 44.264 - Time Delta 56.816 - Time 2023-11-04T04:09:40


 58%|█████▊    | 23260/40000 [17:50:13<9:31:24,  2.05s/it] 

Episode 23260 - Step 4454947 - Epsilon 0.32832973386930303 - Mean Reward 954.88 - Mean Length 206.43 - Mean Loss 1.785 - Mean Q Value 44.306 - Time Delta 53.448 - Time 2023-11-04T04:10:34


 58%|█████▊    | 23280/40000 [17:51:15<13:11:25,  2.84s/it]

Episode 23280 - Step 4459272 - Epsilon 0.32797491915642824 - Mean Reward 987.13 - Mean Length 205.03 - Mean Loss 1.78 - Mean Q Value 44.422 - Time Delta 62.502 - Time 2023-11-04T04:11:36


 58%|█████▊    | 23300/40000 [17:52:14<13:23:37,  2.89s/it]

Episode 23300 - Step 4463273 - Epsilon 0.32764702621730374 - Mean Reward 978.46 - Mean Length 205.37 - Mean Loss 1.777 - Mean Q Value 44.529 - Time Delta 57.622 - Time 2023-11-04T04:12:34


 58%|█████▊    | 23301/40000 [17:52:16<11:10:23,  2.41s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4463273


 58%|█████▊    | 23320/40000 [17:53:10<13:54:26,  3.00s/it]

Episode 23320 - Step 4467668 - Epsilon 0.3272872217060231 - Mean Reward 965.23 - Mean Length 203.65 - Mean Loss 1.777 - Mean Q Value 44.759 - Time Delta 62.798 - Time 2023-11-04T04:13:36


 58%|█████▊    | 23340/40000 [17:54:21<13:35:51,  2.94s/it]

Episode 23340 - Step 4472347 - Epsilon 0.32690460125931425 - Mean Reward 973.99 - Mean Length 211.01 - Mean Loss 1.785 - Mean Q Value 44.987 - Time Delta 67.028 - Time 2023-11-04T04:14:43


 58%|█████▊    | 23360/40000 [17:55:17<10:50:59,  2.35s/it]

Episode 23360 - Step 4476112 - Epsilon 0.3265970470303068 - Mean Reward 949.93 - Mean Length 211.65 - Mean Loss 1.812 - Mean Q Value 45.21 - Time Delta 53.41 - Time 2023-11-04T04:15:37


 58%|█████▊    | 23380/40000 [17:56:12<14:50:19,  3.21s/it]

Episode 23380 - Step 4480084 - Epsilon 0.3262728970891224 - Mean Reward 957.14 - Mean Length 208.12 - Mean Loss 1.828 - Mean Q Value 45.458 - Time Delta 56.62 - Time 2023-11-04T04:16:34


 58%|█████▊    | 23400/40000 [17:57:20<12:46:57,  2.77s/it]

Episode 23400 - Step 4484842 - Epsilon 0.3258850261614655 - Mean Reward 985.03 - Mean Length 215.69 - Mean Loss 1.846 - Mean Q Value 45.782 - Time Delta 67.833 - Time 2023-11-04T04:17:41


 59%|█████▊    | 23401/40000 [17:57:23<12:52:41,  2.79s/it]

MarioNet saved to checkpoints2/mario_net_8.chkpt at step 4484842


 59%|█████▊    | 23420/40000 [17:58:43<20:14:21,  4.39s/it]

Episode 23420 - Step 4490405 - Epsilon 0.3254321165196967 - Mean Reward 976.48 - Mean Length 227.37 - Mean Loss 1.848 - Mean Q Value 45.884 - Time Delta 82.87 - Time 2023-11-04T04:19:04


 59%|█████▊    | 23440/40000 [17:59:43<14:39:04,  3.19s/it]

Episode 23440 - Step 4494538 - Epsilon 0.32509603740002174 - Mean Reward 976.43 - Mean Length 221.91 - Mean Loss 1.866 - Mean Q Value 45.769 - Time Delta 59.209 - Time 2023-11-04T04:20:03


 59%|█████▊    | 23460/40000 [18:00:48<11:45:11,  2.56s/it]

Episode 23460 - Step 4499161 - Epsilon 0.3247205246490326 - Mean Reward 979.16 - Mean Length 230.49 - Mean Loss 1.871 - Mean Q Value 45.701 - Time Delta 66.746 - Time 2023-11-04T04:21:10


 59%|█████▊    | 23464/40000 [18:01:04<17:04:05,  3.72s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4500000


 59%|█████▊    | 23480/40000 [18:01:49<13:49:39,  3.01s/it]

Episode 23480 - Step 4503300 - Epsilon 0.32438469382456336 - Mean Reward 983.31 - Mean Length 232.16 - Mean Loss 1.903 - Mean Q Value 45.432 - Time Delta 60.273 - Time 2023-11-04T04:22:10


 59%|█████▉    | 23500/40000 [18:02:38<13:30:03,  2.95s/it]

Episode 23500 - Step 4506780 - Epsilon 0.32410260183344397 - Mean Reward 957.95 - Mean Length 219.38 - Mean Loss 1.917 - Mean Q Value 45.242 - Time Delta 49.95 - Time 2023-11-04T04:23:00


 59%|█████▉    | 23501/40000 [18:02:42<14:54:20,  3.25s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4506780


 59%|█████▉    | 23520/40000 [18:03:38<16:29:26,  3.60s/it]

Episode 23520 - Step 4510912 - Epsilon 0.32376797666753 - Mean Reward 949.48 - Mean Length 205.07 - Mean Loss 1.931 - Mean Q Value 45.122 - Time Delta 59.486 - Time 2023-11-04T04:24:00


 59%|█████▉    | 23540/40000 [18:04:32<11:25:10,  2.50s/it]

Episode 23540 - Step 4514650 - Epsilon 0.32346555678318856 - Mean Reward 931.67 - Mean Length 201.12 - Mean Loss 1.939 - Mean Q Value 45.154 - Time Delta 53.637 - Time 2023-11-04T04:24:54


 59%|█████▉    | 23560/40000 [18:05:32<13:38:14,  2.99s/it]

Episode 23560 - Step 4518879 - Epsilon 0.32312375349810807 - Mean Reward 952.81 - Mean Length 197.18 - Mean Loss 1.919 - Mean Q Value 45.211 - Time Delta 60.349 - Time 2023-11-04T04:25:54


 59%|█████▉    | 23580/40000 [18:07:09<9:57:00,  2.18s/it] 

Episode 23580 - Step 4525827 - Epsilon 0.3225629746455373 - Mean Reward 939.24 - Mean Length 225.27 - Mean Loss 1.892 - Mean Q Value 45.153 - Time Delta 98.398 - Time 2023-11-04T04:27:32


 59%|█████▉    | 23600/40000 [18:07:57<6:23:56,  1.40s/it]

Episode 23600 - Step 4528936 - Epsilon 0.32231235994988233 - Mean Reward 907.66 - Mean Length 221.56 - Mean Loss 1.872 - Mean Q Value 45.054 - Time Delta 44.464 - Time 2023-11-04T04:28:17


 59%|█████▉    | 23601/40000 [18:07:59<6:05:00,  1.34s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4528936


 59%|█████▉    | 23620/40000 [18:08:47<12:43:33,  2.80s/it]

Episode 23620 - Step 4532518 - Epsilon 0.32202385839129655 - Mean Reward 888.47 - Mean Length 216.06 - Mean Loss 1.868 - Mean Q Value 44.94 - Time Delta 51.526 - Time 2023-11-04T04:29:08


 59%|█████▉    | 23640/40000 [18:09:37<7:09:34,  1.58s/it]

Episode 23640 - Step 4535936 - Epsilon 0.3217488065025381 - Mean Reward 875.63 - Mean Length 212.86 - Mean Loss 1.829 - Mean Q Value 44.824 - Time Delta 49.652 - Time 2023-11-04T04:29:58


 59%|█████▉    | 23660/40000 [18:10:38<9:19:00,  2.05s/it] 

Episode 23660 - Step 4540134 - Epsilon 0.32141130822107733 - Mean Reward 862.35 - Mean Length 212.55 - Mean Loss 1.822 - Mean Q Value 44.672 - Time Delta 60.45 - Time 2023-11-04T04:30:58


 59%|█████▉    | 23680/40000 [18:11:35<11:52:43,  2.62s/it]

Episode 23680 - Step 4544212 - Epsilon 0.32108379632891554 - Mean Reward 851.86 - Mean Length 183.85 - Mean Loss 1.811 - Mean Q Value 44.787 - Time Delta 59.162 - Time 2023-11-04T04:31:58


 59%|█████▉    | 23700/40000 [18:12:45<10:02:29,  2.22s/it]

Episode 23700 - Step 4548978 - Epsilon 0.32070145276415646 - Mean Reward 917.31 - Mean Length 200.42 - Mean Loss 1.788 - Mean Q Value 44.864 - Time Delta 68.888 - Time 2023-11-04T04:33:06


 59%|█████▉    | 23701/40000 [18:12:48<10:50:13,  2.39s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4548978


 59%|█████▉    | 23720/40000 [18:13:57<22:12:56,  4.91s/it]

Episode 23720 - Step 4554028 - Epsilon 0.32029682260593895 - Mean Reward 953.65 - Mean Length 215.1 - Mean Loss 1.772 - Mean Q Value 44.921 - Time Delta 72.311 - Time 2023-11-04T04:34:19


 59%|█████▉    | 23740/40000 [18:14:52<11:50:53,  2.62s/it]

Episode 23740 - Step 4557801 - Epsilon 0.3199948450327892 - Mean Reward 961.38 - Mean Length 218.65 - Mean Loss 1.783 - Mean Q Value 44.855 - Time Delta 54.623 - Time 2023-11-04T04:35:13


 59%|█████▉    | 23760/40000 [18:15:56<12:35:34,  2.79s/it]

Episode 23760 - Step 4562364 - Epsilon 0.3196300189948766 - Mean Reward 1002.05 - Mean Length 222.3 - Mean Loss 1.781 - Mean Q Value 44.85 - Time Delta 65.643 - Time 2023-11-04T04:36:19


 59%|█████▉    | 23780/40000 [18:16:55<14:15:04,  3.16s/it]

Episode 23780 - Step 4566235 - Epsilon 0.3193208466302115 - Mean Reward 1004.01 - Mean Length 220.23 - Mean Loss 1.773 - Mean Q Value 44.802 - Time Delta 55.469 - Time 2023-11-04T04:37:14


 60%|█████▉    | 23800/40000 [18:18:05<14:13:31,  3.16s/it]

Episode 23800 - Step 4571291 - Epsilon 0.31891748001100095 - Mean Reward 964.06 - Mean Length 223.13 - Mean Loss 1.783 - Mean Q Value 44.742 - Time Delta 72.307 - Time 2023-11-04T04:38:27


 60%|█████▉    | 23801/40000 [18:18:08<14:02:14,  3.12s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4571291


 60%|█████▉    | 23820/40000 [18:18:56<12:54:27,  2.87s/it]

Episode 23820 - Step 4574875 - Epsilon 0.31863185789098514 - Mean Reward 946.01 - Mean Length 208.47 - Mean Loss 1.79 - Mean Q Value 44.689 - Time Delta 51.509 - Time 2023-11-04T04:39:18


 60%|█████▉    | 23840/40000 [18:19:49<9:05:11,  2.02s/it] 

Episode 23840 - Step 4578454 - Epsilon 0.31834688950721923 - Mean Reward 954.4 - Mean Length 206.53 - Mean Loss 1.786 - Mean Q Value 44.688 - Time Delta 51.499 - Time 2023-11-04T04:40:10


 60%|█████▉    | 23860/40000 [18:20:31<7:34:28,  1.69s/it]

Episode 23860 - Step 4581437 - Epsilon 0.31810957078588775 - Mean Reward 893.76 - Mean Length 190.73 - Mean Loss 1.8 - Mean Q Value 44.657 - Time Delta 42.9 - Time 2023-11-04T04:40:53


 60%|█████▉    | 23880/40000 [18:21:19<10:34:21,  2.36s/it]

Episode 23880 - Step 4584763 - Epsilon 0.3178451725834789 - Mean Reward 864.34 - Mean Length 185.28 - Mean Loss 1.83 - Mean Q Value 44.729 - Time Delta 47.884 - Time 2023-11-04T04:41:41


 60%|█████▉    | 23900/40000 [18:22:18<15:43:25,  3.52s/it]

Episode 23900 - Step 4589039 - Epsilon 0.3175055775973505 - Mean Reward 903.28 - Mean Length 177.48 - Mean Loss 1.837 - Mean Q Value 44.835 - Time Delta 61.547 - Time 2023-11-04T04:42:42


 60%|█████▉    | 23901/40000 [18:22:24<18:40:02,  4.17s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4589039


 60%|█████▉    | 23920/40000 [18:23:21<14:19:53,  3.21s/it]

Episode 23920 - Step 4593249 - Epsilon 0.3171715787328671 - Mean Reward 915.65 - Mean Length 183.74 - Mean Loss 1.844 - Mean Q Value 45.122 - Time Delta 60.917 - Time 2023-11-04T04:43:43


 60%|█████▉    | 23940/40000 [18:24:16<8:48:01,  1.97s/it] 

Episode 23940 - Step 4596945 - Epsilon 0.3168786475125268 - Mean Reward 886.8 - Mean Length 184.91 - Mean Loss 1.874 - Mean Q Value 45.347 - Time Delta 54.323 - Time 2023-11-04T04:44:37


 60%|█████▉    | 23960/40000 [18:25:25<23:12:52,  5.21s/it]

Episode 23960 - Step 4601971 - Epsilon 0.3164807394802045 - Mean Reward 901.13 - Mean Length 205.34 - Mean Loss 1.873 - Mean Q Value 45.611 - Time Delta 72.36 - Time 2023-11-04T04:45:50


 60%|█████▉    | 23980/40000 [18:26:22<11:00:26,  2.47s/it]

Episode 23980 - Step 4605652 - Epsilon 0.3161896320096378 - Mean Reward 926.29 - Mean Length 208.89 - Mean Loss 1.857 - Mean Q Value 45.58 - Time Delta 53.177 - Time 2023-11-04T04:46:43


 60%|██████    | 24000/40000 [18:27:08<9:39:22,  2.17s/it] 

Episode 24000 - Step 4608955 - Epsilon 0.31592864615771027 - Mean Reward 884.26 - Mean Length 199.16 - Mean Loss 1.858 - Mean Q Value 45.533 - Time Delta 47.851 - Time 2023-11-04T04:47:31


 60%|██████    | 24001/40000 [18:27:13<13:23:17,  3.01s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4608955


 60%|██████    | 24020/40000 [18:27:54<11:36:25,  2.61s/it]

Episode 24020 - Step 4611967 - Epsilon 0.315690841402121 - Mean Reward 843.18 - Mean Length 187.18 - Mean Loss 1.87 - Mean Q Value 45.433 - Time Delta 44.873 - Time 2023-11-04T04:48:16


 60%|██████    | 24040/40000 [18:29:19<18:15:50,  4.12s/it]

Episode 24040 - Step 4617582 - Epsilon 0.3152480012192588 - Mean Reward 855.98 - Mean Length 206.37 - Mean Loss 1.844 - Mean Q Value 45.343 - Time Delta 82.452 - Time 2023-11-04T04:49:38


 60%|██████    | 24060/40000 [18:30:15<9:57:33,  2.25s/it]

Episode 24060 - Step 4621670 - Epsilon 0.31492598230141144 - Mean Reward 898.89 - Mean Length 196.99 - Mean Loss 1.824 - Mean Q Value 45.227 - Time Delta 60.668 - Time 2023-11-04T04:50:39


 60%|██████    | 24080/40000 [18:31:23<12:39:19,  2.86s/it]

Episode 24080 - Step 4626136 - Epsilon 0.314574563614184 - Mean Reward 926.73 - Mean Length 204.84 - Mean Loss 1.803 - Mean Q Value 45.417 - Time Delta 65.726 - Time 2023-11-04T04:51:44


 60%|██████    | 24100/40000 [18:32:26<11:44:57,  2.66s/it]

Episode 24100 - Step 4630315 - Epsilon 0.314246083417461 - Mean Reward 965.12 - Mean Length 213.6 - Mean Loss 1.778 - Mean Q Value 45.724 - Time Delta 61.524 - Time 2023-11-04T04:52:46


 60%|██████    | 24101/40000 [18:32:28<10:56:56,  2.48s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4630315


 60%|██████    | 24120/40000 [18:33:18<12:52:43,  2.92s/it]

Episode 24120 - Step 4633967 - Epsilon 0.31395930764047686 - Mean Reward 979.54 - Mean Length 220.0 - Mean Loss 1.75 - Mean Q Value 45.929 - Time Delta 54.435 - Time 2023-11-04T04:53:40


 60%|██████    | 24140/40000 [18:34:15<14:02:57,  3.19s/it]

Episode 24140 - Step 4637633 - Epsilon 0.31367169571732817 - Mean Reward 982.5 - Mean Length 200.51 - Mean Loss 1.729 - Mean Q Value 46.167 - Time Delta 55.205 - Time 2023-11-04T04:54:36


 60%|██████    | 24160/40000 [18:35:15<14:16:28,  3.24s/it]

Episode 24160 - Step 4641814 - Epsilon 0.3133440016273302 - Mean Reward 968.07 - Mean Length 201.44 - Mean Loss 1.731 - Mean Q Value 46.455 - Time Delta 60.982 - Time 2023-11-04T04:55:37


 60%|██████    | 24180/40000 [18:36:06<8:59:14,  2.05s/it]

Episode 24180 - Step 4645278 - Epsilon 0.3130727631511894 - Mean Reward 939.58 - Mean Length 191.42 - Mean Loss 1.742 - Mean Q Value 46.645 - Time Delta 50.921 - Time 2023-11-04T04:56:28


 60%|██████    | 24200/40000 [18:37:09<12:17:03,  2.80s/it]

Episode 24200 - Step 4649391 - Epsilon 0.3127510114911371 - Mean Reward 936.53 - Mean Length 190.76 - Mean Loss 1.762 - Mean Q Value 46.828 - Time Delta 60.512 - Time 2023-11-04T04:57:28


 61%|██████    | 24201/40000 [18:37:10<10:02:38,  2.29s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4649391


 61%|██████    | 24220/40000 [18:38:04<11:14:58,  2.57s/it]

Episode 24220 - Step 4653408 - Epsilon 0.3124370889033269 - Mean Reward 946.14 - Mean Length 194.41 - Mean Loss 1.773 - Mean Q Value 46.927 - Time Delta 58.091 - Time 2023-11-04T04:58:26


 61%|██████    | 24240/40000 [18:39:03<12:55:51,  2.95s/it]

Episode 24240 - Step 4657275 - Epsilon 0.31213518626555015 - Mean Reward 958.01 - Mean Length 196.42 - Mean Loss 1.807 - Mean Q Value 47.191 - Time Delta 55.882 - Time 2023-11-04T04:59:22


 61%|██████    | 24260/40000 [18:39:45<8:20:38,  1.91s/it] 

Episode 24260 - Step 4660358 - Epsilon 0.3118947007296597 - Mean Reward 902.15 - Mean Length 185.44 - Mean Loss 1.843 - Mean Q Value 47.388 - Time Delta 45.169 - Time 2023-11-04T05:00:07


 61%|██████    | 24280/40000 [18:40:39<14:13:42,  3.26s/it]

Episode 24280 - Step 4664016 - Epsilon 0.311609603370803 - Mean Reward 899.29 - Mean Length 187.38 - Mean Loss 1.876 - Mean Q Value 47.496 - Time Delta 53.45 - Time 2023-11-04T05:01:01


 61%|██████    | 24300/40000 [18:41:28<7:07:16,  1.63s/it]

Episode 24300 - Step 4667396 - Epsilon 0.31134640444022665 - Mean Reward 876.17 - Mean Length 180.05 - Mean Loss 1.905 - Mean Q Value 47.527 - Time Delta 47.952 - Time 2023-11-04T05:01:49


 61%|██████    | 24301/40000 [18:41:30<7:28:05,  1.71s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4667396


 61%|██████    | 24320/40000 [18:42:24<16:32:48,  3.80s/it]

Episode 24320 - Step 4671295 - Epsilon 0.311043067357468 - Mean Reward 881.53 - Mean Length 178.87 - Mean Loss 1.903 - Mean Q Value 47.807 - Time Delta 55.36 - Time 2023-11-04T05:02:44


 61%|██████    | 24340/40000 [18:43:45<10:43:23,  2.47s/it]

Episode 24340 - Step 4676943 - Epsilon 0.3106041844157143 - Mean Reward 893.48 - Mean Length 196.68 - Mean Loss 1.925 - Mean Q Value 47.968 - Time Delta 81.488 - Time 2023-11-04T05:04:05


 61%|██████    | 24360/40000 [18:44:37<10:38:58,  2.45s/it]

Episode 24360 - Step 4680544 - Epsilon 0.31032468879056285 - Mean Reward 923.85 - Mean Length 201.86 - Mean Loss 1.939 - Mean Q Value 48.053 - Time Delta 53.765 - Time 2023-11-04T05:04:59


 61%|██████    | 24380/40000 [18:45:29<10:22:50,  2.39s/it]

Episode 24380 - Step 4684086 - Epsilon 0.3100500178727626 - Mean Reward 918.42 - Mean Length 200.7 - Mean Loss 1.947 - Mean Q Value 48.152 - Time Delta 51.571 - Time 2023-11-04T05:05:51


 61%|██████    | 24400/40000 [18:46:20<11:09:49,  2.58s/it]

Episode 24400 - Step 4687445 - Epsilon 0.3097897626276508 - Mean Reward 911.15 - Mean Length 200.49 - Mean Loss 1.937 - Mean Q Value 48.283 - Time Delta 48.18 - Time 2023-11-04T05:06:39


 61%|██████    | 24401/40000 [18:46:21<9:06:30,  2.10s/it] 

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4687445


 61%|██████    | 24420/40000 [18:47:03<18:53:21,  4.36s/it]

Episode 24420 - Step 4690700 - Epsilon 0.30953777371867114 - Mean Reward 882.21 - Mean Length 194.05 - Mean Loss 1.955 - Mean Q Value 48.237 - Time Delta 46.707 - Time 2023-11-04T05:07:26


 61%|██████    | 24440/40000 [18:47:59<11:50:05,  2.74s/it]

Episode 24440 - Step 4694608 - Epsilon 0.3092355029592312 - Mean Reward 878.56 - Mean Length 176.65 - Mean Loss 1.951 - Mean Q Value 48.196 - Time Delta 55.962 - Time 2023-11-04T05:08:22


 61%|██████    | 24460/40000 [18:48:47<11:01:13,  2.55s/it]

Episode 24460 - Step 4697631 - Epsilon 0.30900188648757887 - Mean Reward 842.63 - Mean Length 170.87 - Mean Loss 1.903 - Mean Q Value 48.3 - Time Delta 45.113 - Time 2023-11-04T05:09:07


 61%|██████    | 24480/40000 [18:49:38<10:33:20,  2.45s/it]

Episode 24480 - Step 4701212 - Epsilon 0.3087253763054373 - Mean Reward 848.8 - Mean Length 171.26 - Mean Loss 1.887 - Mean Q Value 48.352 - Time Delta 53.25 - Time 2023-11-04T05:10:00


 61%|██████▏   | 24500/40000 [18:50:37<10:35:53,  2.46s/it]

Episode 24500 - Step 4705164 - Epsilon 0.30842050622611517 - Mean Reward 867.96 - Mean Length 177.19 - Mean Loss 1.875 - Mean Q Value 48.426 - Time Delta 58.895 - Time 2023-11-04T05:10:59


 61%|██████▏   | 24501/40000 [18:50:41<12:11:33,  2.83s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4705164


 61%|██████▏   | 24520/40000 [18:51:27<12:29:50,  2.91s/it]

Episode 24520 - Step 4708334 - Epsilon 0.3081761797715031 - Mean Reward 869.89 - Mean Length 176.34 - Mean Loss 1.864 - Mean Q Value 48.577 - Time Delta 47.307 - Time 2023-11-04T05:11:46


 61%|██████▏   | 24540/40000 [18:52:02<8:43:53,  2.03s/it]

Episode 24540 - Step 4710838 - Epsilon 0.3079833218296665 - Mean Reward 812.38 - Mean Length 162.3 - Mean Loss 1.846 - Mean Q Value 48.641 - Time Delta 37.407 - Time 2023-11-04T05:12:24


 61%|██████▏   | 24560/40000 [18:52:46<12:18:33,  2.87s/it]

Episode 24560 - Step 4713648 - Epsilon 0.30776703949708023 - Mean Reward 818.4 - Mean Length 160.17 - Mean Loss 1.869 - Mean Q Value 48.721 - Time Delta 42.657 - Time 2023-11-04T05:13:06


 61%|██████▏   | 24580/40000 [18:53:37<12:21:10,  2.88s/it]

Episode 24580 - Step 4717113 - Epsilon 0.3075005517049572 - Mean Reward 823.13 - Mean Length 159.01 - Mean Loss 1.849 - Mean Q Value 48.967 - Time Delta 51.713 - Time 2023-11-04T05:13:58


 62%|██████▏   | 24600/40000 [18:54:30<13:02:36,  3.05s/it]

Episode 24600 - Step 4720804 - Epsilon 0.30721693640851055 - Mean Reward 808.22 - Mean Length 156.4 - Mean Loss 1.833 - Mean Q Value 49.261 - Time Delta 54.727 - Time 2023-11-04T05:14:53


 62%|██████▏   | 24601/40000 [18:54:34<14:21:09,  3.36s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4720804


 62%|██████▏   | 24620/40000 [18:55:39<14:07:38,  3.31s/it]

Episode 24620 - Step 4725384 - Epsilon 0.30686537428005534 - Mean Reward 804.13 - Mean Length 170.5 - Mean Loss 1.825 - Mean Q Value 49.397 - Time Delta 67.489 - Time 2023-11-04T05:16:00


 62%|██████▏   | 24640/40000 [18:56:59<9:24:40,  2.21s/it] 

Episode 24640 - Step 4730833 - Epsilon 0.3064476314713381 - Mean Reward 845.36 - Mean Length 199.95 - Mean Loss 1.837 - Mean Q Value 49.339 - Time Delta 80.341 - Time 2023-11-04T05:17:20


 62%|██████▏   | 24660/40000 [18:57:56<8:27:51,  1.99s/it] 

Episode 24660 - Step 4734643 - Epsilon 0.3061558790350088 - Mean Reward 888.27 - Mean Length 209.95 - Mean Loss 1.826 - Mean Q Value 49.172 - Time Delta 56.18 - Time 2023-11-04T05:18:17


 62%|██████▏   | 24680/40000 [18:58:59<13:40:23,  3.21s/it]

Episode 24680 - Step 4738982 - Epsilon 0.305823956462742 - Mean Reward 921.93 - Mean Length 218.69 - Mean Loss 1.823 - Mean Q Value 48.944 - Time Delta 64.685 - Time 2023-11-04T05:19:21


 62%|██████▏   | 24700/40000 [19:00:12<11:45:01,  2.76s/it]

Episode 24700 - Step 4743718 - Epsilon 0.3054620751290275 - Mean Reward 920.22 - Mean Length 229.14 - Mean Loss 1.853 - Mean Q Value 48.568 - Time Delta 70.007 - Time 2023-11-04T05:20:31


 62%|██████▏   | 24701/40000 [19:00:13<9:32:48,  2.25s/it] 

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4743718


 62%|██████▏   | 24720/40000 [19:01:10<10:20:04,  2.43s/it]

Episode 24720 - Step 4747768 - Epsilon 0.3051529512596421 - Mean Reward 955.78 - Mean Length 223.84 - Mean Loss 1.865 - Mean Q Value 48.246 - Time Delta 59.917 - Time 2023-11-04T05:21:31


 62%|██████▏   | 24740/40000 [19:02:18<19:22:29,  4.57s/it]

Episode 24740 - Step 4752421 - Epsilon 0.3047981884236334 - Mean Reward 981.09 - Mean Length 215.88 - Mean Loss 1.869 - Mean Q Value 48.126 - Time Delta 68.882 - Time 2023-11-04T05:22:40


 62%|██████▏   | 24760/40000 [19:03:14<11:07:09,  2.63s/it]

Episode 24760 - Step 4756132 - Epsilon 0.30451554300137246 - Mean Reward 970.09 - Mean Length 214.89 - Mean Loss 1.883 - Mean Q Value 48.018 - Time Delta 54.732 - Time 2023-11-04T05:23:35


 62%|██████▏   | 24780/40000 [19:04:04<8:50:19,  2.09s/it]

Episode 24780 - Step 4759428 - Epsilon 0.30426472551354783 - Mean Reward 891.35 - Mean Length 204.46 - Mean Loss 1.906 - Mean Q Value 47.866 - Time Delta 48.651 - Time 2023-11-04T05:24:24


 62%|██████▏   | 24800/40000 [19:05:07<14:57:55,  3.54s/it]

Episode 24800 - Step 4763828 - Epsilon 0.3039302182863268 - Mean Reward 936.48 - Mean Length 201.1 - Mean Loss 1.915 - Mean Q Value 47.79 - Time Delta 64.553 - Time 2023-11-04T05:25:28


 62%|██████▏   | 24801/40000 [19:05:10<14:26:05,  3.42s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4763828


 62%|██████▏   | 24820/40000 [19:06:05<14:49:39,  3.52s/it]

Episode 24820 - Step 4767916 - Epsilon 0.30361976023547216 - Mean Reward 962.28 - Mean Length 201.48 - Mean Loss 1.938 - Mean Q Value 47.72 - Time Delta 59.898 - Time 2023-11-04T05:26:28


 62%|██████▏   | 24840/40000 [19:07:04<14:33:50,  3.46s/it]

Episode 24840 - Step 4771817 - Epsilon 0.3033237993686414 - Mean Reward 947.36 - Mean Length 193.96 - Mean Loss 1.95 - Mean Q Value 47.742 - Time Delta 57.002 - Time 2023-11-04T05:27:25


 62%|██████▏   | 24860/40000 [19:08:10<13:31:41,  3.22s/it]

Episode 24860 - Step 4776915 - Epsilon 0.30293745938544964 - Mean Reward 945.67 - Mean Length 207.83 - Mean Loss 1.96 - Mean Q Value 47.792 - Time Delta 75.386 - Time 2023-11-04T05:28:40


 62%|██████▏   | 24880/40000 [19:09:18<11:34:45,  2.76s/it]

Episode 24880 - Step 4780953 - Epsilon 0.30263179829034625 - Mean Reward 1017.22 - Mean Length 215.25 - Mean Loss 1.974 - Mean Q Value 47.777 - Time Delta 59.855 - Time 2023-11-04T05:29:40


 62%|██████▏   | 24900/40000 [19:10:18<9:22:58,  2.24s/it] 

Episode 24900 - Step 4784836 - Epsilon 0.3023381609826762 - Mean Reward 983.13 - Mean Length 210.08 - Mean Loss 1.994 - Mean Q Value 47.747 - Time Delta 57.354 - Time 2023-11-04T05:30:38


 62%|██████▏   | 24901/40000 [19:10:19<8:04:18,  1.92s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4784836


 62%|██████▏   | 24920/40000 [19:11:25<13:43:56,  3.28s/it]

Episode 24920 - Step 4789611 - Epsilon 0.3019774600940415 - Mean Reward 987.37 - Mean Length 216.95 - Mean Loss 1.976 - Mean Q Value 47.764 - Time Delta 71.83 - Time 2023-11-04T05:31:49


 62%|██████▏   | 24940/40000 [19:12:11<7:33:48,  1.81s/it]

Episode 24940 - Step 4792430 - Epsilon 0.3017647164267726 - Mean Reward 940.12 - Mean Length 206.13 - Mean Loss 1.969 - Mean Q Value 47.706 - Time Delta 42.437 - Time 2023-11-04T05:32:32


 62%|██████▏   | 24960/40000 [19:12:56<8:45:24,  2.10s/it] 

Episode 24960 - Step 4795477 - Epsilon 0.3015349346545226 - Mean Reward 916.61 - Mean Length 185.62 - Mean Loss 1.95 - Mean Q Value 47.639 - Time Delta 46.144 - Time 2023-11-04T05:33:18


 62%|██████▏   | 24980/40000 [19:13:55<13:13:08,  3.17s/it]

Episode 24980 - Step 4799450 - Epsilon 0.3012355837329775 - Mean Reward 902.53 - Mean Length 184.97 - Mean Loss 1.926 - Mean Q Value 47.693 - Time Delta 59.295 - Time 2023-11-04T05:34:17


 62%|██████▎   | 25000/40000 [19:14:44<7:01:08,  1.68s/it]

Episode 25000 - Step 4802555 - Epsilon 0.30100184031525323 - Mean Reward 877.45 - Mean Length 177.19 - Mean Loss 1.883 - Mean Q Value 47.716 - Time Delta 46.09 - Time 2023-11-04T05:35:03


 63%|██████▎   | 25001/40000 [19:14:45<6:16:21,  1.51s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4802555


 63%|██████▎   | 25020/40000 [19:15:32<5:32:04,  1.33s/it]

Episode 25020 - Step 4805941 - Epsilon 0.30074715003842245 - Mean Reward 829.37 - Mean Length 163.3 - Mean Loss 1.851 - Mean Q Value 47.788 - Time Delta 49.957 - Time 2023-11-04T05:35:53


 63%|██████▎   | 25040/40000 [19:16:27<15:17:49,  3.68s/it]

Episode 25040 - Step 4809579 - Epsilon 0.30047374482111794 - Mean Reward 843.86 - Mean Length 171.49 - Mean Loss 1.819 - Mean Q Value 47.81 - Time Delta 53.288 - Time 2023-11-04T05:36:47


 63%|██████▎   | 25060/40000 [19:17:11<6:18:43,  1.52s/it]

Episode 25060 - Step 4812875 - Epsilon 0.3002262564036458 - Mean Reward 859.71 - Mean Length 173.98 - Mean Loss 1.836 - Mean Q Value 47.753 - Time Delta 48.139 - Time 2023-11-04T05:37:35


 63%|██████▎   | 25080/40000 [19:18:10<17:57:43,  4.33s/it]

Episode 25080 - Step 4816802 - Epsilon 0.2999316538762347 - Mean Reward 848.81 - Mean Length 173.52 - Mean Loss 1.828 - Mean Q Value 47.702 - Time Delta 56.408 - Time 2023-11-04T05:38:31


 63%|██████▎   | 25100/40000 [19:19:06<9:53:22,  2.39s/it] 

Episode 25100 - Step 4820767 - Epsilon 0.29963449389139196 - Mean Reward 893.19 - Mean Length 182.12 - Mean Loss 1.834 - Mean Q Value 47.587 - Time Delta 56.341 - Time 2023-11-04T05:39:28


 63%|██████▎   | 25101/40000 [19:19:09<11:21:13,  2.74s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4820767


 63%|██████▎   | 25120/40000 [19:19:53<12:08:47,  2.94s/it]

Episode 25120 - Step 4824175 - Epsilon 0.29937931399273615 - Mean Reward 903.32 - Mean Length 182.34 - Mean Loss 1.856 - Mean Q Value 47.478 - Time Delta 48.807 - Time 2023-11-04T05:40:16


 63%|██████▎   | 25140/40000 [19:20:43<10:34:53,  2.56s/it]

Episode 25140 - Step 4827617 - Epsilon 0.2991218088682918 - Mean Reward 930.42 - Mean Length 180.38 - Mean Loss 1.871 - Mean Q Value 47.662 - Time Delta 48.909 - Time 2023-11-04T05:41:05


 63%|██████▎   | 25160/40000 [19:21:32<10:37:51,  2.58s/it]

Episode 25160 - Step 4830908 - Epsilon 0.29887580758191684 - Mean Reward 911.55 - Mean Length 180.33 - Mean Loss 1.859 - Mean Q Value 47.958 - Time Delta 46.189 - Time 2023-11-04T05:41:51


 63%|██████▎   | 25180/40000 [19:22:19<6:51:29,  1.67s/it]

Episode 25180 - Step 4834310 - Epsilon 0.2986217217410926 - Mean Reward 899.75 - Mean Length 175.08 - Mean Loss 1.871 - Mean Q Value 48.272 - Time Delta 48.015 - Time 2023-11-04T05:42:39


 63%|██████▎   | 25200/40000 [19:23:36<11:52:00,  2.89s/it]

Episode 25200 - Step 4839864 - Epsilon 0.2982073731566761 - Mean Reward 894.03 - Mean Length 190.97 - Mean Loss 1.867 - Mean Q Value 48.477 - Time Delta 79.477 - Time 2023-11-04T05:43:59


 63%|██████▎   | 25201/40000 [19:23:41<13:36:58,  3.31s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4839864


 63%|██████▎   | 25220/40000 [19:24:27<10:21:40,  2.52s/it]

Episode 25220 - Step 4843339 - Epsilon 0.29794841796878774 - Mean Reward 871.47 - Mean Length 191.64 - Mean Loss 1.868 - Mean Q Value 48.661 - Time Delta 49.394 - Time 2023-11-04T05:44:48


 63%|██████▎   | 25240/40000 [19:25:19<10:43:00,  2.61s/it]

Episode 25240 - Step 4846942 - Epsilon 0.29768016173177264 - Mean Reward 868.25 - Mean Length 193.25 - Mean Loss 1.845 - Mean Q Value 48.637 - Time Delta 51.397 - Time 2023-11-04T05:45:40


 63%|██████▎   | 25260/40000 [19:26:07<8:56:52,  2.19s/it]

Episode 25260 - Step 4850245 - Epsilon 0.29743445376811173 - Mean Reward 901.34 - Mean Length 193.37 - Mean Loss 1.82 - Mean Q Value 48.567 - Time Delta 47.319 - Time 2023-11-04T05:46:27


 63%|██████▎   | 25280/40000 [19:26:56<11:06:10,  2.72s/it]

Episode 25280 - Step 4853827 - Epsilon 0.2971682204052256 - Mean Reward 913.07 - Mean Length 195.17 - Mean Loss 1.794 - Mean Q Value 48.501 - Time Delta 50.737 - Time 2023-11-04T05:47:18


 63%|██████▎   | 25300/40000 [19:27:39<7:13:11,  1.77s/it]

Episode 25300 - Step 4856965 - Epsilon 0.2969351833279013 - Mean Reward 888.34 - Mean Length 171.01 - Mean Loss 1.749 - Mean Q Value 48.655 - Time Delta 44.591 - Time 2023-11-04T05:48:02


 63%|██████▎   | 25301/40000 [19:27:44<10:52:25,  2.66s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4856965


 63%|██████▎   | 25320/40000 [19:28:34<9:27:00,  2.32s/it] 

Episode 25320 - Step 4860614 - Epsilon 0.2966644276902205 - Mean Reward 916.65 - Mean Length 172.75 - Mean Loss 1.707 - Mean Q Value 48.836 - Time Delta 51.817 - Time 2023-11-04T05:48:54


 63%|██████▎   | 25340/40000 [19:29:24<8:26:06,  2.07s/it] 

Episode 25340 - Step 4864055 - Epsilon 0.29640933182322 - Mean Reward 911.94 - Mean Length 171.13 - Mean Loss 1.694 - Mean Q Value 49.094 - Time Delta 49.179 - Time 2023-11-04T05:49:43


 63%|██████▎   | 25360/40000 [19:30:19<9:28:25,  2.33s/it] 

Episode 25360 - Step 4868162 - Epsilon 0.29610514968993057 - Mean Reward 941.17 - Mean Length 179.17 - Mean Loss 1.677 - Mean Q Value 49.288 - Time Delta 58.246 - Time 2023-11-04T05:50:42


 63%|██████▎   | 25380/40000 [19:31:22<12:55:48,  3.18s/it]

Episode 25380 - Step 4872455 - Epsilon 0.29578752527396956 - Mean Reward 953.53 - Mean Length 186.28 - Mean Loss 1.667 - Mean Q Value 49.43 - Time Delta 61.245 - Time 2023-11-04T05:51:43


 64%|██████▎   | 25400/40000 [19:32:27<13:14:15,  3.26s/it]

Episode 25400 - Step 4877023 - Epsilon 0.295449928682205 - Mean Reward 995.97 - Mean Length 200.58 - Mean Loss 1.671 - Mean Q Value 49.562 - Time Delta 65.367 - Time 2023-11-04T05:52:48


 64%|██████▎   | 25401/40000 [19:32:30<13:10:02,  3.25s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4877023


 64%|██████▎   | 25420/40000 [19:33:18<10:33:16,  2.61s/it]

Episode 25420 - Step 4880566 - Epsilon 0.29518834973901753 - Mean Reward 999.43 - Mean Length 199.52 - Mean Loss 1.688 - Mean Q Value 49.71 - Time Delta 50.413 - Time 2023-11-04T05:53:39


 64%|██████▎   | 25440/40000 [19:34:17<15:08:48,  3.75s/it]

Episode 25440 - Step 4884573 - Epsilon 0.29489279283446124 - Mean Reward 1036.52 - Mean Length 205.18 - Mean Loss 1.696 - Mean Q Value 49.813 - Time Delta 57.594 - Time 2023-11-04T05:54:36


 64%|██████▎   | 25460/40000 [19:35:05<13:00:17,  3.22s/it]

Episode 25460 - Step 4888087 - Epsilon 0.29463384324381214 - Mean Reward 1007.49 - Mean Length 199.25 - Mean Loss 1.687 - Mean Q Value 49.991 - Time Delta 50.708 - Time 2023-11-04T05:55:27


 64%|██████▎   | 25480/40000 [19:35:56<9:56:31,  2.46s/it]

Episode 25480 - Step 4891652 - Epsilon 0.2943713677811766 - Mean Reward 990.91 - Mean Length 191.97 - Mean Loss 1.681 - Mean Q Value 50.205 - Time Delta 51.232 - Time 2023-11-04T05:56:18


 64%|██████▍   | 25500/40000 [19:36:53<13:36:40,  3.38s/it]

Episode 25500 - Step 4895645 - Epsilon 0.29407765814874487 - Mean Reward 972.83 - Mean Length 186.22 - Mean Loss 1.682 - Mean Q Value 50.372 - Time Delta 56.846 - Time 2023-11-04T05:57:15


 64%|██████▍   | 25501/40000 [19:36:57<13:43:36,  3.41s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4895645


 64%|██████▍   | 25520/40000 [19:37:48<11:31:48,  2.87s/it]

Episode 25520 - Step 4899525 - Epsilon 0.2937925410887912 - Mean Reward 980.68 - Mean Length 189.59 - Mean Loss 1.664 - Mean Q Value 50.466 - Time Delta 55.238 - Time 2023-11-04T05:58:10


 64%|██████▍   | 25540/40000 [19:38:46<9:11:26,  2.29s/it]

Episode 25540 - Step 4903555 - Epsilon 0.29349669412455515 - Mean Reward 952.41 - Mean Length 189.82 - Mean Loss 1.67 - Mean Q Value 50.503 - Time Delta 57.166 - Time 2023-11-04T05:59:07


 64%|██████▍   | 25560/40000 [19:39:52<10:10:54,  2.54s/it]

Episode 25560 - Step 4908248 - Epsilon 0.29315255100753906 - Mean Reward 1009.61 - Mean Length 201.61 - Mean Loss 1.693 - Mean Q Value 50.458 - Time Delta 66.729 - Time 2023-11-04T06:00:14


 64%|██████▍   | 25580/40000 [19:40:48<13:22:02,  3.34s/it]

Episode 25580 - Step 4912154 - Epsilon 0.2928664272283425 - Mean Reward 1039.53 - Mean Length 205.02 - Mean Loss 1.705 - Mean Q Value 50.509 - Time Delta 55.049 - Time 2023-11-04T06:01:09


 64%|██████▍   | 25600/40000 [19:41:52<18:49:19,  4.71s/it]

Episode 25600 - Step 4916659 - Epsilon 0.2925367720453436 - Mean Reward 1055.84 - Mean Length 210.14 - Mean Loss 1.715 - Mean Q Value 50.468 - Time Delta 63.92 - Time 2023-11-04T06:02:13


 64%|██████▍   | 25601/40000 [19:41:55<16:53:45,  4.22s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4916659


 64%|██████▍   | 25620/40000 [19:42:33<7:29:43,  1.88s/it]

Episode 25620 - Step 4919462 - Epsilon 0.2923318486850973 - Mean Reward 1006.25 - Mean Length 199.37 - Mean Loss 1.713 - Mean Q Value 50.52 - Time Delta 39.941 - Time 2023-11-04T06:02:53


 64%|██████▍   | 25640/40000 [19:43:27<11:02:39,  2.77s/it]

Episode 25640 - Step 4923325 - Epsilon 0.2920496654480799 - Mean Reward 1007.32 - Mean Length 197.7 - Mean Loss 1.722 - Mean Q Value 50.563 - Time Delta 54.625 - Time 2023-11-04T06:03:48


 64%|██████▍   | 25660/40000 [19:44:28<13:18:16,  3.34s/it]

Episode 25660 - Step 4927746 - Epsilon 0.29172705582995273 - Mean Reward 984.68 - Mean Length 194.98 - Mean Loss 1.722 - Mean Q Value 50.611 - Time Delta 62.703 - Time 2023-11-04T06:04:50


 64%|██████▍   | 25680/40000 [19:45:27<11:39:42,  2.93s/it]

Episode 25680 - Step 4931780 - Epsilon 0.29143299736125566 - Mean Reward 973.68 - Mean Length 196.26 - Mean Loss 1.725 - Mean Q Value 50.56 - Time Delta 57.736 - Time 2023-11-04T06:05:48


 64%|██████▍   | 25700/40000 [19:46:15<9:01:21,  2.27s/it]

Episode 25700 - Step 4935252 - Epsilon 0.2911801432424685 - Mean Reward 940.03 - Mean Length 185.93 - Mean Loss 1.733 - Mean Q Value 50.636 - Time Delta 49.644 - Time 2023-11-04T06:06:38


 64%|██████▍   | 25701/40000 [19:46:19<11:33:59,  2.91s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4935252


 64%|██████▍   | 25720/40000 [19:47:02<9:18:22,  2.35s/it]

Episode 25720 - Step 4938848 - Epsilon 0.2909184898917543 - Mean Reward 968.12 - Mean Length 193.86 - Mean Loss 1.756 - Mean Q Value 50.744 - Time Delta 50.923 - Time 2023-11-04T06:07:29


 64%|██████▍   | 25740/40000 [19:48:04<15:00:37,  3.79s/it]

Episode 25740 - Step 4942917 - Epsilon 0.2906227034907619 - Mean Reward 967.26 - Mean Length 195.92 - Mean Loss 1.77 - Mean Q Value 50.967 - Time Delta 57.856 - Time 2023-11-04T06:08:27


 64%|██████▍   | 25760/40000 [19:48:56<12:20:25,  3.12s/it]

Episode 25760 - Step 4946757 - Epsilon 0.2903438395366302 - Mean Reward 934.75 - Mean Length 190.11 - Mean Loss 1.788 - Mean Q Value 51.222 - Time Delta 54.103 - Time 2023-11-04T06:09:21


 64%|██████▍   | 25780/40000 [19:50:02<13:36:09,  3.44s/it]

Episode 25780 - Step 4951063 - Epsilon 0.29003145252675205 - Mean Reward 937.69 - Mean Length 192.83 - Mean Loss 1.808 - Mean Q Value 51.4 - Time Delta 60.974 - Time 2023-11-04T06:10:22


 64%|██████▍   | 25800/40000 [19:50:52<8:16:16,  2.10s/it]

Episode 25800 - Step 4954715 - Epsilon 0.28976677462132155 - Mean Reward 931.87 - Mean Length 194.63 - Mean Loss 1.847 - Mean Q Value 51.589 - Time Delta 52.221 - Time 2023-11-04T06:11:14


 65%|██████▍   | 25801/40000 [19:50:56<10:29:43,  2.66s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4954715


 65%|██████▍   | 25820/40000 [19:51:47<6:29:47,  1.65s/it]

Episode 25820 - Step 4958545 - Epsilon 0.2894894556875459 - Mean Reward 939.77 - Mean Length 196.97 - Mean Loss 1.875 - Mean Q Value 51.587 - Time Delta 54.579 - Time 2023-11-04T06:12:08


 65%|██████▍   | 25840/40000 [19:52:37<9:57:17,  2.53s/it]

Episode 25840 - Step 4962075 - Epsilon 0.2892340939059052 - Mean Reward 924.61 - Mean Length 191.58 - Mean Loss 1.903 - Mean Q Value 51.506 - Time Delta 49.952 - Time 2023-11-04T06:12:58


 65%|██████▍   | 25860/40000 [19:53:52<15:39:26,  3.99s/it]

Episode 25860 - Step 4967454 - Epsilon 0.28884540771093986 - Mean Reward 977.9 - Mean Length 206.97 - Mean Loss 1.928 - Mean Q Value 51.433 - Time Delta 76.496 - Time 2023-11-04T06:14:15


 65%|██████▍   | 25880/40000 [19:54:45<11:20:07,  2.89s/it]

Episode 25880 - Step 4971040 - Epsilon 0.28858657381016734 - Mean Reward 945.8 - Mean Length 199.77 - Mean Loss 1.962 - Mean Q Value 51.341 - Time Delta 50.575 - Time 2023-11-04T06:15:05


 65%|██████▍   | 25900/40000 [19:55:54<7:48:16,  1.99s/it]

Episode 25900 - Step 4975981 - Epsilon 0.2882303172785265 - Mean Reward 914.45 - Mean Length 212.66 - Mean Loss 1.974 - Mean Q Value 51.091 - Time Delta 68.974 - Time 2023-11-04T06:16:14


 65%|██████▍   | 25901/40000 [19:55:56<7:38:21,  1.95s/it]

MarioNet saved to checkpoints2/mario_net_9.chkpt at step 4975981


 65%|██████▍   | 25908/40000 [19:56:15<11:42:25,  2.99s/it]