# CNN for text plagiarism detection

In [1]:
import contextlib
import json
import math
import os
import shutil
import time
from typing import Literal

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from cycler import cycler
from torch.utils.data import DataLoader, Dataset
from torchtext.data import get_tokenizer
from torchtext.vocab import GloVe
from tqdm import tqdm

In [2]:
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# DEVICE = torch.device("cpu")
DEVICE

device(type='cuda')

In [3]:
LOG_PATH_PREFIX = "../logs/cnn/"

## Hyper parameters

In [4]:
GLOVE_DIM = 300
TRAIN_SIZE = "md"
TEST_SIZE = "sm"

EMBED_DIM = GLOVE_DIM
CNN_DIM = 1  # 1 or 2
OUT_CHANNELS = 3
KERNEL_SIZE = 4
MAX_POOL_KERNEL = 2
MAX_POOL_STRIDE = 2
DROPOUT = 0.5

LR = 0.005

CLIP_GRAD = 0.0

BATCH_SIZE = 10

FEATURES = ""
COMMENTS = ""


EXPERIMENT_NAME = "test"

EPOCHS = 10
EVAL_PERIOD = 1
LOG_PERIOD = EVAL_PERIOD

In [5]:
# Parameters
EXPERIMENT_NAME = "20_04"
EPOCHS = 5500
EVAL_PERIOD = 10
CNN_DIM = 1

In [6]:
if CNN_DIM != 1 and CNN_DIM != 2:
    CNN_DIM = 1

LOG_PATH_PREFIX = f"{LOG_PATH_PREFIX[:-1]}_{CNN_DIM}d/"

In [7]:
def pack_hyperparameters() -> dict:
    return {
        "GLOVE_DIM": GLOVE_DIM,
        "TRAIN_SIZE": TRAIN_SIZE,
        "TEST_SIZE": TEST_SIZE,
        "EMBED_DIM": EMBED_DIM,
        "CNN_DIM": CNN_DIM,
        "OUT_CHANNELS": OUT_CHANNELS,
        "KERNEL_SIZE": KERNEL_SIZE,
        "MAX_POOL_KERNEL": MAX_POOL_KERNEL,
        "MAX_POOL_STRIDE": MAX_POOL_STRIDE,
        "DROPOUT": DROPOUT,
        "LR": LR,
        "CLIP_GRAD": CLIP_GRAD,
        "BATCH_SIZE": BATCH_SIZE,
        "FEATURES": FEATURES,
        "COMMENTS": COMMENTS,
    }


def load_from_experiments(experiment: str) -> None:
    global \
        GLOVE_DIM, \
        TRAIN_SIZE, \
        TEST_SIZE, \
        EMBED_DIM, \
        CNN_DIM, \
        OUT_CHANNELS, \
        KERNEL_SIZE, \
        MAX_POOL_KERNEL, \
        MAX_POOL_STRIDE, \
        DROPOUT, \
        LR, \
        CLIP_GRAD, \
        BATCH_SIZE, \
        FEATURES, \
        COMMENTS
    with open(os.path.join(".", LOG_PATH_PREFIX, experiment, "configs.json"), "r") as f:
        hyper_dict = json.load(f)
    GLOVE_DIM = hyper_dict["GLOVE_DIM"]
    TRAIN_SIZE = hyper_dict["TRAIN_SIZE"]
    TEST_SIZE = hyper_dict["TEST_SIZE"]
    EMBED_DIM = hyper_dict["EMBED_DIM"]
    CNN_DIM = hyper_dict["CNN_DIM"]
    OUT_CHANNELS = hyper_dict["OUT_CHANNELS"]
    KERNEL_SIZE = hyper_dict["KERNEL_SIZE"]
    MAX_POOL_KERNEL = hyper_dict["MAX_POOL_KERNEL"]
    MAX_POOL_STRIDE = hyper_dict["MAX_POOL_STRIDE"]
    DROPOUT = hyper_dict["DROPOUT"]
    LR = hyper_dict["LR"]
    CLIP_GRAD = hyper_dict["CLIP_GRAD"]
    BATCH_SIZE = hyper_dict["BATCH_SIZE"]
    FEATURES = hyper_dict["FEATURES"]
    COMMENTS = hyper_dict["COMMENTS"]
    print(hyper_dict)

## Utilities

In [8]:
PREDEFINED_COLORS = [
    "#ffa500",
    "#c83cbc",
    "#1c1c84",
    "#ff0000",
    "#08a4a7",
    "#008000",
]


def get_plots(
    data_dict: dict,
    plots: list[tuple[dict, dict]],
    title: str = "",
    ylim=None,
    row_plots: int = 1,
    plot_width: float = 8,
    plot_height: float = 4,
    use_rainbow: bool = False,
    use_common_legend: bool = False,
    adjust: bool = False,
):
    num_plots = len(plots)
    num_entities = max([len(x[1]) for x in plots]) + 1
    if use_rainbow:
        num_colors = num_entities
        cm = plt.get_cmap("gist_rainbow")
        colors = [cm(1.0 * i / num_colors) for i in range(num_colors)]
    else:
        colors = PREDEFINED_COLORS

    style_cycler = cycler(linestyle=["-", "--", ":", "-."]) * cycler(color=colors)
    column_plots = math.ceil(num_plots / row_plots)

    fig, axs = plt.subplots(
        column_plots,
        row_plots,
        figsize=(plot_width * row_plots, plot_height * column_plots),
    )

    if len(title) > 0:
        fig.suptitle(title, fontsize=14, y=1)
    axs_list = [axs] if column_plots * row_plots == 1 else list(axs.flat)

    for ax in axs_list:
        ax.grid()
        ax.set_prop_cycle(style_cycler)
        if ylim is not None:
            ax.set_ylim(top=ylim)
        ax.set_visible(False)

    for ax, (p1, p2) in zip(axs_list, plots):
        ax.set_visible(True)

        ax.set_title(f"{p2['axis_name']} over {p1['axis_name']}")
        ax.set(xlabel=p1["axis_label"], ylabel=p2["axis_label"])

        if p1.get("log", False):
            ax.set_xscale("log")
        if p2.get("log", False):
            ax.set_yscale("log")

        x_values = data_dict[p1.get("ref", None) or p1["axis_name"]]

        p2vs = p2.get("values", [])
        if len(p2vs) == 0:
            y_values = data_dict[p2.get("ref", None) or p2["axis_name"]]
            ax.plot(x_values, y_values, label=p2["axis_name"])
            ax.scatter(x_values[-1], y_values[-1], s=15)
            continue

        for p2v in p2vs:
            y_values = data_dict[p2v.get("ref", None) or p2v["name"]]

            try:
                iter(y_values)
                ax.plot(x_values, y_values, label=p2v["name"])
                ax.scatter(x_values[-1], y_values[-1], s=15)
            except TypeError:
                ax.plot(x_values, [y_values] * len(x_values), label=p2v["name"])

    if use_common_legend:
        lines_labels = [axs_list[0].get_legend_handles_labels()]
        lines, labels = [sum(x, []) for x in zip(*lines_labels)]
        fig.legend(
            lines,
            labels,
            scatterpoints=1,
            markerscale=3,
            loc="outside lower center",
            ncol=min(6, num_entities),
            bbox_to_anchor=(0.5, -0.05),
        )
    else:
        if num_entities > 1:
            for ax, _ in zip(axs_list, plots):
                ax.legend()

    plt.tight_layout()
    if adjust:
        plt.subplots_adjust(
            top=1 - 0.1 / (num_plots**0.5), bottom=0.12 / (num_plots**2), hspace=0.15
        )

    return fig


def draw_plots(
    data_dict: dict,
    plots: list[tuple[dict, dict]],
    title: str = "",
    ylim=None,
    row_plots: int = 1,
    plot_width: float = 8,
    plot_height: float = 4,
    use_rainbow: bool = False,
    use_common_legend: bool = False,
    adjust: bool = False,
):
    get_plots(
        data_dict,
        plots,
        title,
        ylim,
        row_plots,
        plot_width,
        plot_height,
        use_rainbow,
        use_common_legend,
        adjust,
    )
    plt.show()

## Logger

In [9]:
LoggerEntity = Literal["data"] | Literal["model"] | Literal["best"] | Literal["plots"]


class Logger:
    _plot_epoch_axis = {"axis_name": "Epoch", "axis_label": "Epoch"}
    _plot_time_axis = {"axis_name": "Time", "axis_label": "Running time, seconds"}
    log_plots = [
        (
            _plot_epoch_axis,
            {"axis_name": "Mean Loss", "axis_label": "Loss"},
        ),
        (
            _plot_time_axis,
            {"axis_name": "Mean Loss", "axis_label": "Loss"},
        ),
        (
            {**_plot_epoch_axis, "ref": "Eval Epoch"},
            {"axis_name": "Mean Test Loss", "axis_label": "Loss"},
        ),
        (
            {**_plot_time_axis, "ref": "Eval Time"},
            {"axis_name": "Mean Test Loss", "axis_label": "Loss"},
        ),
    ]

    def _soft_mkdir(self, path: str) -> None:
        with contextlib.suppress(Exception):
            os.makedirs(path)

    def _soft_rmdir(self, path: str) -> None:
        with contextlib.suppress(Exception):
            shutil.rmtree(path)

    def _get_max_index(self, path: str) -> int:
        max_index = 0
        for file in os.listdir(path):
            with contextlib.suppress(ValueError):
                max_index = max(max_index, int(os.path.splitext(file)[0]))
        return max_index

    def _remove_old(self):
        if self.keep_last == -1:
            return
        if self.idx % self.keep_last != 0:
            return
        retain = [str(x) for x in range(self.idx, self.idx - self.keep_last, -1)] + [
            "0",
            "best",
            "configs.json",
        ]

        abs_prefix = os.path.abspath(self.path)
        for item in os.listdir(self.path):
            if item not in retain:
                self._soft_rmdir(os.path.join(abs_prefix, item))

    def _save_json(self, path: str, data: dict) -> None:
        with open(path, "w") as f:
            json.dump(data, f)

    def _load_json(self, path: str) -> dict:
        with open(path, "r") as f:
            return json.load(f)

    def _save_configs(self) -> None:
        self._save_json(os.path.join(self.path, "configs.json"), self.configs)

    def _load_configs(self) -> dict:
        return self._load_json(os.path.join(self.path, "configs.json"))

    def _build_dirs(self) -> None:
        self.dirs = {
            "models": os.path.join(self.path, str(self.idx), "models"),
            "data": os.path.join(self.path, str(self.idx), "data"),
            "plots": os.path.join(self.path, str(self.idx), "plots"),
            "best": os.path.join(self.path, "best"),
        }

        for dir_path in self.dirs.values():
            self._soft_mkdir(dir_path)

    def __init__(
        self,
        path: str,
        configs: dict,
        path_prefix: str = LOG_PATH_PREFIX,
        keep_last: int = 2,
        force: bool = False,
    ) -> None:
        self.keep_last = keep_last
        self.configs = configs
        self.path = os.path.join(".", path_prefix, path)

        if force:
            self._soft_rmdir(os.path.abspath(self.path))

        if os.path.exists(self.path):
            self.new = False
            self.load_state()
        else:
            self.new = True
            self.init_state()

    def init_state(self) -> None:
        self.idx = 0
        self._build_dirs()
        self._save_configs()
        self.save_best_dict({})

    def load_state(self) -> None:
        # Check compatibility
        existed_configs = self._load_configs()
        new_comments = self.configs.get("COMMENTS")
        del existed_configs["COMMENTS"]
        del self.configs["COMMENTS"]
        if existed_configs != self.configs:
            raise Exception("Configs are not the same!")
        self.configs["COMMENTS"] = new_comments

        self.idx = self._get_max_index(self.path)
        self._build_dirs()

    def update_idx(self, inc_value: int):
        self.new = False
        self.idx += inc_value
        self._build_dirs()
        self._remove_old()

    def load(self, name: str, entity: str, *data):
        if entity == "model":
            checkpoint = torch.load(os.path.join(self.dirs["models"], name))
            model, optimizer = data
            model.load_state_dict(checkpoint["model_state_dict"])
            optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
            return None

        if entity == "data":
            return self._load_json(os.path.join(self.dirs["data"], f"{name}.json"))

        return None

    def save(self, name: str, entity: LoggerEntity, data, save_subplots: bool = False):
        if entity == "model":
            model, optimizer = data
            torch.save(
                {
                    "model_state_dict": model.state_dict(),
                    "optimizer_state_dict": optimizer.state_dict(),
                },
                os.path.join(self.dirs["models"], name),
            )
            return
        if entity == "data":
            self._save_json(os.path.join(self.dirs["data"], f"{name}.json"), data)
            return

        if entity == "plots":
            plots_data = data
            if save_subplots:
                for single_plot in [[sp] for sp in self.log_plots]:
                    fig = get_plots(plots_data, single_plot, row_plots=1)
                    plot_name = f"{single_plot[0][1]['axis_name']} over {single_plot[0][0]['axis_name']}".replace(
                        " ", "_"
                    ).lower()
                    fig.savefig(
                        os.path.join(self.dirs["plots"], f"{name}{plot_name}.png")
                    )
                    plt.close()
                    plt.clf()

            fig = get_plots(plots_data, self.log_plots, row_plots=2)
            fig.savefig(os.path.join(self.dirs["plots"], f"{name}all.png"))
            plt.close()
            plt.clf()
            return
        if entity == "best":
            (best_model, best_optimizer), best_dict = data
            best_path = os.path.abspath(self.dirs["best"])
            new_best = int(next(iter(best_dict.keys())))

            old_best = -1
            for item in os.listdir(best_path):
                with contextlib.suppress(BaseException):
                    old_best = int(item)

            if new_best == old_best:
                return

            self._soft_rmdir(os.path.join(best_path, str(old_best)))

            path_prefix = os.path.join(best_path, str(new_best))
            self._soft_mkdir(path_prefix)
            torch.save(
                {
                    "model_state_dict": best_model,
                    "optimizer_state_dict": best_optimizer,
                },
                os.path.join(path_prefix, "net"),
            )
            return

    def save_best_dict(self, best_dict: dict) -> None:
        self._save_json(os.path.join(self.path, "best.json"), best_dict)

    def load_best_dict(self) -> dict:
        return self._load_json(os.path.join(self.path, "best.json"))

    @staticmethod
    def update_best_dict(best_dict: dict, loss, epoch: int) -> tuple[dict, bool]:
        if best_dict != {} and loss >= float(next(iter(best_dict.values()))):
            return best_dict, False

        return {epoch: loss}, True

## Dataset

In [10]:
tokenizer = get_tokenizer("basic_english")
global_vectors = GloVe(dim=GLOVE_DIM, cache="../data")


def tokenized_pipeline(x):
    return global_vectors.get_vecs_by_tokens(x, lower_case_backup=True)


def text_pipeline(x):
    return global_vectors.get_vecs_by_tokens(tokenizer(x), lower_case_backup=True)

In [11]:
def read_from_disk(path: str) -> np.ndarray:
    return pd.read_csv(path).to_numpy()

In [12]:
class PlagiarismDataset(Dataset):
    def __init__(self, path: str):
        data = read_from_disk(path)
        texts, self.scores = [], []

        for target, candidate, score in data:
            if CNN_DIM == 1:
                texts.append(tokenized_pipeline(tokenizer(target) + tokenizer(candidate)))
            else:
                texts.append(
                    nn.utils.rnn.pad_sequence(
                        [text_pipeline(target), text_pipeline(candidate)]
                    )
                )
            self.scores.append(score)

        self.texts = nn.utils.rnn.pad_sequence(texts, batch_first=True)
        self.max_length = self.texts.shape[1]

    def __len__(self):
        return len(self.scores)

    def __getitem__(self, idx) -> tuple[torch.Tensor, float]:
        return (
            self.texts[idx],
            self.scores[idx],
        )

    def pad(self, pad_size: int):
        if CNN_DIM == 1:
            self.texts = F.pad(self.texts, (0, 0, 0, pad_size - self.max_length))
            return
        self.texts = F.pad(self.texts, (0, 0, 0, 0, 0, pad_size - self.max_length))

In [13]:
train_dataset = PlagiarismDataset(f"../generated/datasets/train_{TRAIN_SIZE}.csv")
test_dataset = PlagiarismDataset(f"../generated/datasets/test_{TEST_SIZE}.csv")

In [14]:
MAX_LEN = max(train_dataset.max_length, test_dataset.max_length)
MAX_LEN

263

In [15]:
train_dataset.pad(MAX_LEN)
test_dataset.pad(MAX_LEN)

In [16]:
def collate_batch(batch):
    text_list, score_list = [], []
    for _text, _score in batch:
        text_list.append(_text)
        score_list.append(_score)
    return torch.stack(text_list).to(DEVICE), torch.FloatTensor(score_list).to(DEVICE)


np.random.seed(420)
train_dataloader = DataLoader(
    train_dataset, collate_fn=collate_batch, batch_size=BATCH_SIZE, shuffle=True
)
test_dataloader = DataLoader(
    test_dataset, collate_fn=collate_batch, batch_size=BATCH_SIZE, shuffle=False
)

## CNN

In [17]:
class CNN(nn.Module):
    def __init__(self, output_dim: int = 1, hidden_dim: int = 128) -> None:
        super(CNN, self).__init__()

        self.conv_out_dim = (MAX_LEN - KERNEL_SIZE + 1) * OUT_CHANNELS
        self.cnn_out_dim = (
            int((self.conv_out_dim - MAX_POOL_KERNEL) / MAX_POOL_STRIDE) + 1
        )

        if CNN_DIM == 1:
            self.cnn = nn.Sequential(
                nn.Conv1d(
                    in_channels=EMBED_DIM,
                    out_channels=OUT_CHANNELS,
                    kernel_size=KERNEL_SIZE,
                ),
                nn.ReLU(),
                nn.MaxPool1d(MAX_POOL_KERNEL, stride=MAX_POOL_STRIDE),
            )
        else:
            self.cnn = nn.Sequential(
                nn.Conv2d(
                    in_channels=EMBED_DIM,
                    out_channels=OUT_CHANNELS,
                    kernel_size=(2, KERNEL_SIZE),
                ),
                nn.ReLU(),
                nn.Flatten(start_dim=1, end_dim=2),
                nn.MaxPool1d(MAX_POOL_KERNEL, stride=MAX_POOL_STRIDE),
            )

        self.cnet = nn.Sequential(
            nn.Linear(self.cnn_out_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(DROPOUT),
            nn.Linear(hidden_dim, output_dim),
            nn.Sigmoid(),
        )

    def forward(self, x):
        x = torch.swapaxes(x, 1, 2) if CNN_DIM == 1 else torch.swapaxes(x, 1, 3)

        x = self.cnn(x)
        x = torch.flatten(x, 1)
        return self.cnet(x).squeeze()

## Evaluation

In [18]:
@torch.no_grad
def evaluate(
    net: nn.Module,
    data: DataLoader,
) -> float:  # mean loss
    net.eval()

    total_loss: float = 0.0

    eval_loop = tqdm(data, total=len(data), desc="Evaluation")

    for batch in eval_loop:
        texts, scores = batch

        predictions = net(texts)

        loss = F.mse_loss(predictions, scores)

        total_loss += loss.item()

    mean_loss = total_loss / len(data)
    eval_loop.set_postfix({"Mean Test Loss": mean_loss})

    return mean_loss

## Training

In [19]:
# load_from_experiments(...)
LOGGER = Logger(EXPERIMENT_NAME, pack_hyperparameters())

In [20]:
if LOGGER.new:
    np.random.seed(42)
    torch.manual_seed(42)

net = CNN().to(DEVICE)
optimizer = optim.Adam(net.parameters(), lr=LR)

In [21]:
def moving_average(x, w=10):
    w = min(w, max(1, len(x) // 10))
    return scipy.signal.savgol_filter(x, w, min(3, w - 1))


minimizing_constant = F.mse_loss(
    torch.tensor([0, 0.5, 1]), torch.tensor([0.5, 0.5, 0.5])
).item()
print(f"{minimizing_constant=}")

minimizing_constant=0.1666666716337204


In [22]:
log_data = {
    "Mean Loss": [],
    "Mean Test Loss": [],
    "Eval Epoch": [],
    "Eval Time": [],
    "Epoch": [],
    "Time": [],
}

best_dict = {}

if not LOGGER.new:
    best_dict: dict = LOGGER.load_best_dict()
    log_data: dict = LOGGER.load("log_data", "data")
    LOGGER.load("net", "model", net, optimizer)

else:
    # evaluate
    mean_test_loss = evaluate(net, test_dataloader)
    log_data["Mean Test Loss"].append(mean_test_loss)

    log_data["Eval Epoch"].append(0)
    log_data["Eval Time"].append(0)

    # log

    ## models
    LOGGER.save("net", "model", (net, optimizer))

    ## best
    best_dict, updated = Logger.update_best_dict(best_dict, mean_test_loss, 0)
    if updated:
        best_states = (net.state_dict(), optimizer.state_dict())

    LOGGER.save_best_dict(best_dict)
    LOGGER.save("", "best", (best_states, best_dict))

    ## data
    LOGGER.save("log_data", "data", log_data)

Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

Evaluation:   7%|▋         | 1/15 [00:05<01:11,  5.14s/it]

Evaluation: 100%|██████████| 15/15 [00:05<00:00,  2.89it/s]




In [23]:
torch.cuda.synchronize()
start_time = time.time()
time_shift = 0 if len(log_data["Time"]) == 0 else log_data["Time"][-1]
epoch_shift = 0 if len(log_data["Epoch"]) == 0 else log_data["Epoch"][-1]

best_states = (net.state_dict(), optimizer.state_dict())

net.train()
epoch_loop = tqdm(range(1 + epoch_shift, 1 + EPOCHS))

for i in epoch_loop:
    net.train()
    optimizer.zero_grad()

    batch = next(iter(train_dataloader))
    texts, scores = batch

    predictions = net(texts)

    loss = F.mse_loss(predictions, scores)

    loss.backward()

    if CLIP_GRAD > 0:
        nn.utils.clip_grad_norm_(net.parameters(), CLIP_GRAD)
    optimizer.step()

    # logging
    torch.cuda.synchronize()
    elapsed_time = time.time() - start_time + time_shift

    log_data["Epoch"].append(i)
    log_data["Time"].append(elapsed_time)
    log_data["Mean Loss"].append(loss.item())

    epoch_loop.set_postfix(
        {
            "Mean Loss": log_data["Mean Loss"][-1],
        }
    )

    # evaluate
    if i % EVAL_PERIOD == 0 or i == EPOCHS:
        mean_test_loss = evaluate(net, test_dataloader)

        # logging
        log_data["Mean Test Loss"].append(mean_test_loss)
        log_data["Eval Epoch"].append(i)
        log_data["Eval Time"].append(elapsed_time)

        best_dict, updated = Logger.update_best_dict(best_dict, mean_test_loss, i)
        if updated:
            best_states = (net.state_dict(), optimizer.state_dict())

    # log
    if i % LOG_PERIOD == 0 or i == EPOCHS:
        LOGGER.update_idx(1)

        ## models
        LOGGER.save("net", "model", (net, optimizer))

        ## data
        LOGGER.save("log_data", "data", log_data)

        ## best
        LOGGER.save_best_dict(best_dict)
        LOGGER.save("", "best", (best_states, best_dict))

        ## plots
        LOGGER.save(
            "",
            "plots",
            {
                "Epoch": log_data["Epoch"],
                "Time": log_data["Time"],
                "Eval Epoch": log_data["Eval Epoch"],
                "Eval Time": log_data["Eval Time"],
                "Mean Loss": moving_average(log_data["Mean Loss"]),
                "Mean Test Loss": log_data["Mean Test Loss"],
                "Minimizing constant": minimizing_constant,
            },
        )

epoch_loop.close()

  0%|          | 0/5500 [00:00<?, ?it/s]

  0%|          | 0/5500 [00:02<?, ?it/s, Mean Loss=0.182]

  0%|          | 1/5500 [00:12<18:33:05, 12.14s/it, Mean Loss=0.182]

  0%|          | 1/5500 [00:12<18:33:05, 12.14s/it, Mean Loss=0.144]

  0%|          | 2/5500 [00:12<8:08:01,  5.33s/it, Mean Loss=0.144] 

  0%|          | 2/5500 [00:12<8:08:01,  5.33s/it, Mean Loss=0.21] 

  0%|          | 3/5500 [00:13<4:45:48,  3.12s/it, Mean Loss=0.21]

  0%|          | 3/5500 [00:13<4:45:48,  3.12s/it, Mean Loss=0.171]

  0%|          | 4/5500 [00:13<3:12:23,  2.10s/it, Mean Loss=0.171]

  0%|          | 4/5500 [00:13<3:12:23,  2.10s/it, Mean Loss=0.106]

  0%|          | 5/5500 [00:14<2:20:27,  1.53s/it, Mean Loss=0.106]

  0%|          | 5/5500 [00:14<2:20:27,  1.53s/it, Mean Loss=0.125]

  0%|          | 6/5500 [00:14<1:51:57,  1.22s/it, Mean Loss=0.125]

  0%|          | 6/5500 [00:14<1:51:57,  1.22s/it, Mean Loss=0.179]

  0%|          | 7/5500 [00:15<1:30:48,  1.01it/s, Mean Loss=0.179]

  0%|          | 7/5500 [00:15<1:30:48,  1.01it/s, Mean Loss=0.202]

  0%|          | 8/5500 [00:15<1:16:31,  1.20it/s, Mean Loss=0.202]

  0%|          | 8/5500 [00:15<1:16:31,  1.20it/s, Mean Loss=0.174]

  0%|          | 9/5500 [00:16<1:07:46,  1.35it/s, Mean Loss=0.174]

  0%|          | 9/5500 [00:16<1:07:46,  1.35it/s, Mean Loss=0.174]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 180.42it/s]




  0%|          | 10/5500 [00:17<1:04:15,  1.42it/s, Mean Loss=0.174]

  0%|          | 10/5500 [00:17<1:04:15,  1.42it/s, Mean Loss=0.176]

  0%|          | 11/5500 [00:17<59:27,  1.54it/s, Mean Loss=0.176]  

  0%|          | 11/5500 [00:17<59:27,  1.54it/s, Mean Loss=0.175]

  0%|          | 12/5500 [00:18<56:14,  1.63it/s, Mean Loss=0.175]

  0%|          | 12/5500 [00:18<56:14,  1.63it/s, Mean Loss=0.2]  

  0%|          | 13/5500 [00:18<53:58,  1.69it/s, Mean Loss=0.2]

  0%|          | 13/5500 [00:18<53:58,  1.69it/s, Mean Loss=0.177]

  0%|          | 14/5500 [00:19<56:59,  1.60it/s, Mean Loss=0.177]

  0%|          | 14/5500 [00:19<56:59,  1.60it/s, Mean Loss=0.175]

  0%|          | 15/5500 [00:19<55:22,  1.65it/s, Mean Loss=0.175]

  0%|          | 15/5500 [00:19<55:22,  1.65it/s, Mean Loss=0.176]

  0%|          | 16/5500 [00:20<53:53,  1.70it/s, Mean Loss=0.176]

  0%|          | 16/5500 [00:20<53:53,  1.70it/s, Mean Loss=0.173]

  0%|          | 17/5500 [00:20<52:16,  1.75it/s, Mean Loss=0.173]

  0%|          | 17/5500 [00:20<52:16,  1.75it/s, Mean Loss=0.149]

  0%|          | 18/5500 [00:21<52:08,  1.75it/s, Mean Loss=0.149]

  0%|          | 18/5500 [00:21<52:08,  1.75it/s, Mean Loss=0.127]

  0%|          | 19/5500 [00:22<51:06,  1.79it/s, Mean Loss=0.127]

  0%|          | 19/5500 [00:22<51:06,  1.79it/s, Mean Loss=0.228]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 195.08it/s]




  0%|          | 20/5500 [00:22<55:24,  1.65it/s, Mean Loss=0.228]

  0%|          | 20/5500 [00:22<55:24,  1.65it/s, Mean Loss=0.175]

  0%|          | 21/5500 [00:23<52:28,  1.74it/s, Mean Loss=0.175]

  0%|          | 21/5500 [00:23<52:28,  1.74it/s, Mean Loss=0.15] 

  0%|          | 22/5500 [00:23<51:19,  1.78it/s, Mean Loss=0.15]

  0%|          | 22/5500 [00:23<51:19,  1.78it/s, Mean Loss=0.173]

  0%|          | 23/5500 [00:24<49:36,  1.84it/s, Mean Loss=0.173]

  0%|          | 23/5500 [00:24<49:36,  1.84it/s, Mean Loss=0.151]

  0%|          | 24/5500 [00:25<53:25,  1.71it/s, Mean Loss=0.151]

  0%|          | 24/5500 [00:25<53:25,  1.71it/s, Mean Loss=0.1]  

  0%|          | 25/5500 [00:25<51:35,  1.77it/s, Mean Loss=0.1]

  0%|          | 25/5500 [00:25<51:35,  1.77it/s, Mean Loss=0.148]

  0%|          | 26/5500 [00:26<49:15,  1.85it/s, Mean Loss=0.148]

  0%|          | 26/5500 [00:26<49:15,  1.85it/s, Mean Loss=0.128]

  0%|          | 27/5500 [00:26<49:31,  1.84it/s, Mean Loss=0.128]

  0%|          | 27/5500 [00:26<49:31,  1.84it/s, Mean Loss=0.151]

  1%|          | 28/5500 [00:27<48:50,  1.87it/s, Mean Loss=0.151]

  1%|          | 28/5500 [00:27<48:50,  1.87it/s, Mean Loss=0.0982]

  1%|          | 29/5500 [00:27<49:43,  1.83it/s, Mean Loss=0.0982]

  1%|          | 29/5500 [00:27<49:43,  1.83it/s, Mean Loss=0.202] 




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 184.73it/s]




  1%|          | 30/5500 [00:28<53:35,  1.70it/s, Mean Loss=0.202]

  1%|          | 30/5500 [00:28<53:35,  1.70it/s, Mean Loss=0.173]

  1%|          | 31/5500 [00:28<51:46,  1.76it/s, Mean Loss=0.173]

  1%|          | 31/5500 [00:28<51:46,  1.76it/s, Mean Loss=0.175]

  1%|          | 32/5500 [00:29<51:25,  1.77it/s, Mean Loss=0.175]

  1%|          | 32/5500 [00:29<51:25,  1.77it/s, Mean Loss=0.151]

  1%|          | 33/5500 [00:29<50:57,  1.79it/s, Mean Loss=0.151]

  1%|          | 33/5500 [00:29<50:57,  1.79it/s, Mean Loss=0.176]

  1%|          | 34/5500 [00:30<49:14,  1.85it/s, Mean Loss=0.176]

  1%|          | 34/5500 [00:30<49:14,  1.85it/s, Mean Loss=0.201]

  1%|          | 35/5500 [00:31<50:30,  1.80it/s, Mean Loss=0.201]

  1%|          | 35/5500 [00:31<50:30,  1.80it/s, Mean Loss=0.122]

  1%|          | 36/5500 [00:32<1:01:17,  1.49it/s, Mean Loss=0.122]

  1%|          | 36/5500 [00:32<1:01:17,  1.49it/s, Mean Loss=0.175]

  1%|          | 37/5500 [00:32<57:08,  1.59it/s, Mean Loss=0.175]  

  1%|          | 37/5500 [00:32<57:08,  1.59it/s, Mean Loss=0.127]

  1%|          | 38/5500 [00:33<54:54,  1.66it/s, Mean Loss=0.127]

  1%|          | 38/5500 [00:33<54:54,  1.66it/s, Mean Loss=0.151]

  1%|          | 39/5500 [00:33<52:52,  1.72it/s, Mean Loss=0.151]

  1%|          | 39/5500 [00:33<52:52,  1.72it/s, Mean Loss=0.204]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 172.24it/s]




  1%|          | 40/5500 [00:34<54:20,  1.67it/s, Mean Loss=0.204]

  1%|          | 40/5500 [00:34<54:20,  1.67it/s, Mean Loss=0.124]

  1%|          | 41/5500 [00:34<52:09,  1.74it/s, Mean Loss=0.124]

  1%|          | 41/5500 [00:34<52:09,  1.74it/s, Mean Loss=0.0998]

  1%|          | 42/5500 [00:35<51:55,  1.75it/s, Mean Loss=0.0998]

  1%|          | 42/5500 [00:35<51:55,  1.75it/s, Mean Loss=0.174] 

  1%|          | 43/5500 [00:35<51:48,  1.76it/s, Mean Loss=0.174]

  1%|          | 43/5500 [00:35<51:48,  1.76it/s, Mean Loss=0.123]

  1%|          | 44/5500 [00:36<52:11,  1.74it/s, Mean Loss=0.123]

  1%|          | 44/5500 [00:36<52:11,  1.74it/s, Mean Loss=0.198]

  1%|          | 45/5500 [00:37<51:33,  1.76it/s, Mean Loss=0.198]

  1%|          | 45/5500 [00:37<51:33,  1.76it/s, Mean Loss=0.145]

  1%|          | 46/5500 [00:37<51:00,  1.78it/s, Mean Loss=0.145]

  1%|          | 46/5500 [00:37<51:00,  1.78it/s, Mean Loss=0.15] 

  1%|          | 47/5500 [00:38<49:47,  1.83it/s, Mean Loss=0.15]

  1%|          | 47/5500 [00:38<49:47,  1.83it/s, Mean Loss=0.193]

  1%|          | 48/5500 [00:38<49:47,  1.82it/s, Mean Loss=0.193]

  1%|          | 48/5500 [00:38<49:47,  1.82it/s, Mean Loss=0.202]

  1%|          | 49/5500 [00:39<51:06,  1.78it/s, Mean Loss=0.202]

  1%|          | 49/5500 [00:39<51:06,  1.78it/s, Mean Loss=0.102]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 179.90it/s]




  1%|          | 50/5500 [00:39<54:27,  1.67it/s, Mean Loss=0.102]

  1%|          | 50/5500 [00:39<54:27,  1.67it/s, Mean Loss=0.175]

  1%|          | 51/5500 [00:40<1:00:25,  1.50it/s, Mean Loss=0.175]

  1%|          | 51/5500 [00:40<1:00:25,  1.50it/s, Mean Loss=0.151]

  1%|          | 52/5500 [00:41<58:11,  1.56it/s, Mean Loss=0.151]  

  1%|          | 52/5500 [00:41<58:11,  1.56it/s, Mean Loss=0.176]

  1%|          | 53/5500 [00:41<54:45,  1.66it/s, Mean Loss=0.176]

  1%|          | 53/5500 [00:41<54:45,  1.66it/s, Mean Loss=0.22] 

  1%|          | 54/5500 [00:42<52:23,  1.73it/s, Mean Loss=0.22]

  1%|          | 54/5500 [00:42<52:23,  1.73it/s, Mean Loss=0.153]

  1%|          | 55/5500 [00:42<53:00,  1.71it/s, Mean Loss=0.153]

  1%|          | 55/5500 [00:42<53:00,  1.71it/s, Mean Loss=0.117]

  1%|          | 56/5500 [00:43<52:08,  1.74it/s, Mean Loss=0.117]

  1%|          | 56/5500 [00:43<52:08,  1.74it/s, Mean Loss=0.151]

  1%|          | 57/5500 [00:44<50:00,  1.81it/s, Mean Loss=0.151]

  1%|          | 57/5500 [00:44<50:00,  1.81it/s, Mean Loss=0.174]

  1%|          | 58/5500 [00:44<49:03,  1.85it/s, Mean Loss=0.174]

  1%|          | 58/5500 [00:44<49:03,  1.85it/s, Mean Loss=0.128]

  1%|          | 59/5500 [00:45<55:39,  1.63it/s, Mean Loss=0.128]

  1%|          | 59/5500 [00:45<55:39,  1.63it/s, Mean Loss=0.152]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 165.67it/s]




  1%|          | 60/5500 [00:46<58:31,  1.55it/s, Mean Loss=0.152]

  1%|          | 60/5500 [00:46<58:31,  1.55it/s, Mean Loss=0.149]

  1%|          | 61/5500 [00:46<56:48,  1.60it/s, Mean Loss=0.149]

  1%|          | 61/5500 [00:46<56:48,  1.60it/s, Mean Loss=0.168]

  1%|          | 62/5500 [00:47<56:24,  1.61it/s, Mean Loss=0.168]

  1%|          | 62/5500 [00:47<56:24,  1.61it/s, Mean Loss=0.126]

  1%|          | 63/5500 [00:47<53:31,  1.69it/s, Mean Loss=0.126]

  1%|          | 63/5500 [00:47<53:31,  1.69it/s, Mean Loss=0.153]

  1%|          | 64/5500 [00:48<51:57,  1.74it/s, Mean Loss=0.153]

  1%|          | 64/5500 [00:48<51:57,  1.74it/s, Mean Loss=0.196]

  1%|          | 65/5500 [00:48<51:54,  1.75it/s, Mean Loss=0.196]

  1%|          | 65/5500 [00:48<51:54,  1.75it/s, Mean Loss=0.104]

  1%|          | 66/5500 [00:49<51:31,  1.76it/s, Mean Loss=0.104]

  1%|          | 66/5500 [00:49<51:31,  1.76it/s, Mean Loss=0.189]

  1%|          | 67/5500 [00:49<50:27,  1.79it/s, Mean Loss=0.189]

  1%|          | 67/5500 [00:49<50:27,  1.79it/s, Mean Loss=0.0779]

  1%|          | 68/5500 [00:50<48:57,  1.85it/s, Mean Loss=0.0779]

  1%|          | 68/5500 [00:50<48:57,  1.85it/s, Mean Loss=0.149] 

  1%|▏         | 69/5500 [00:51<57:51,  1.56it/s, Mean Loss=0.149]

  1%|▏         | 69/5500 [00:51<57:51,  1.56it/s, Mean Loss=0.123]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 172.69it/s]




  1%|▏         | 70/5500 [00:51<57:14,  1.58it/s, Mean Loss=0.123]

  1%|▏         | 70/5500 [00:51<57:14,  1.58it/s, Mean Loss=0.18] 

  1%|▏         | 71/5500 [00:52<53:36,  1.69it/s, Mean Loss=0.18]

  1%|▏         | 71/5500 [00:52<53:36,  1.69it/s, Mean Loss=0.127]

  1%|▏         | 72/5500 [00:52<52:53,  1.71it/s, Mean Loss=0.127]

  1%|▏         | 72/5500 [00:53<52:53,  1.71it/s, Mean Loss=0.204]

  1%|▏         | 73/5500 [00:53<51:47,  1.75it/s, Mean Loss=0.204]

  1%|▏         | 73/5500 [00:53<51:47,  1.75it/s, Mean Loss=0.164]

  1%|▏         | 74/5500 [00:54<50:44,  1.78it/s, Mean Loss=0.164]

  1%|▏         | 74/5500 [00:54<50:44,  1.78it/s, Mean Loss=0.146]

  1%|▏         | 75/5500 [00:54<49:07,  1.84it/s, Mean Loss=0.146]

  1%|▏         | 75/5500 [00:54<49:07,  1.84it/s, Mean Loss=0.173]

  1%|▏         | 76/5500 [00:55<48:48,  1.85it/s, Mean Loss=0.173]

  1%|▏         | 76/5500 [00:55<48:48,  1.85it/s, Mean Loss=0.122]

  1%|▏         | 77/5500 [00:55<48:40,  1.86it/s, Mean Loss=0.122]

  1%|▏         | 77/5500 [00:55<48:40,  1.86it/s, Mean Loss=0.16] 

  1%|▏         | 78/5500 [00:56<48:55,  1.85it/s, Mean Loss=0.16]

  1%|▏         | 78/5500 [00:56<48:55,  1.85it/s, Mean Loss=0.246]

  1%|▏         | 79/5500 [00:56<48:36,  1.86it/s, Mean Loss=0.246]

  1%|▏         | 79/5500 [00:56<48:36,  1.86it/s, Mean Loss=0.146]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 192.26it/s]




  1%|▏         | 80/5500 [00:57<51:17,  1.76it/s, Mean Loss=0.146]

  1%|▏         | 80/5500 [00:57<51:17,  1.76it/s, Mean Loss=0.0724]

  1%|▏         | 81/5500 [00:57<49:50,  1.81it/s, Mean Loss=0.0724]

  1%|▏         | 81/5500 [00:57<49:50,  1.81it/s, Mean Loss=0.161] 

  1%|▏         | 82/5500 [00:58<49:45,  1.82it/s, Mean Loss=0.161]

  1%|▏         | 82/5500 [00:58<49:45,  1.82it/s, Mean Loss=0.149]

  2%|▏         | 83/5500 [00:58<48:51,  1.85it/s, Mean Loss=0.149]

  2%|▏         | 83/5500 [00:58<48:51,  1.85it/s, Mean Loss=0.0781]

  2%|▏         | 84/5500 [00:59<49:32,  1.82it/s, Mean Loss=0.0781]

  2%|▏         | 84/5500 [00:59<49:32,  1.82it/s, Mean Loss=0.199] 

  2%|▏         | 85/5500 [01:00<49:59,  1.81it/s, Mean Loss=0.199]

  2%|▏         | 85/5500 [01:00<49:59,  1.81it/s, Mean Loss=0.0753]

  2%|▏         | 86/5500 [01:00<48:57,  1.84it/s, Mean Loss=0.0753]

  2%|▏         | 86/5500 [01:00<48:57,  1.84it/s, Mean Loss=0.191] 

  2%|▏         | 87/5500 [01:01<50:53,  1.77it/s, Mean Loss=0.191]

  2%|▏         | 87/5500 [01:01<50:53,  1.77it/s, Mean Loss=0.195]

  2%|▏         | 88/5500 [01:01<51:01,  1.77it/s, Mean Loss=0.195]

  2%|▏         | 88/5500 [01:01<51:01,  1.77it/s, Mean Loss=0.119]

  2%|▏         | 89/5500 [01:02<51:24,  1.75it/s, Mean Loss=0.119]

  2%|▏         | 89/5500 [01:02<51:24,  1.75it/s, Mean Loss=0.172]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 217.16it/s]




  2%|▏         | 90/5500 [01:03<55:04,  1.64it/s, Mean Loss=0.172]

  2%|▏         | 90/5500 [01:03<55:04,  1.64it/s, Mean Loss=0.102]

  2%|▏         | 91/5500 [01:03<1:02:46,  1.44it/s, Mean Loss=0.102]

  2%|▏         | 91/5500 [01:03<1:02:46,  1.44it/s, Mean Loss=0.189]

  2%|▏         | 92/5500 [01:04<58:26,  1.54it/s, Mean Loss=0.189]  

  2%|▏         | 92/5500 [01:04<58:26,  1.54it/s, Mean Loss=0.191]

  2%|▏         | 93/5500 [01:04<54:24,  1.66it/s, Mean Loss=0.191]

  2%|▏         | 93/5500 [01:04<54:24,  1.66it/s, Mean Loss=0.0714]

  2%|▏         | 94/5500 [01:05<52:02,  1.73it/s, Mean Loss=0.0714]

  2%|▏         | 94/5500 [01:05<52:02,  1.73it/s, Mean Loss=0.166] 

  2%|▏         | 95/5500 [01:06<51:40,  1.74it/s, Mean Loss=0.166]

  2%|▏         | 95/5500 [01:06<51:40,  1.74it/s, Mean Loss=0.204]

  2%|▏         | 96/5500 [01:06<51:32,  1.75it/s, Mean Loss=0.204]

  2%|▏         | 96/5500 [01:06<51:32,  1.75it/s, Mean Loss=0.156]

  2%|▏         | 97/5500 [01:07<52:17,  1.72it/s, Mean Loss=0.156]

  2%|▏         | 97/5500 [01:07<52:17,  1.72it/s, Mean Loss=0.168]

  2%|▏         | 98/5500 [01:07<51:54,  1.73it/s, Mean Loss=0.168]

  2%|▏         | 98/5500 [01:07<51:54,  1.73it/s, Mean Loss=0.206]

  2%|▏         | 99/5500 [01:08<51:36,  1.74it/s, Mean Loss=0.206]

  2%|▏         | 99/5500 [01:08<51:36,  1.74it/s, Mean Loss=0.157]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 171.87it/s]




  2%|▏         | 100/5500 [01:09<53:39,  1.68it/s, Mean Loss=0.157]

  2%|▏         | 100/5500 [01:09<53:39,  1.68it/s, Mean Loss=0.188]

  2%|▏         | 101/5500 [01:09<51:56,  1.73it/s, Mean Loss=0.188]

  2%|▏         | 101/5500 [01:09<51:56,  1.73it/s, Mean Loss=0.195]

  2%|▏         | 102/5500 [01:10<51:09,  1.76it/s, Mean Loss=0.195]

  2%|▏         | 102/5500 [01:10<51:09,  1.76it/s, Mean Loss=0.19] 

  2%|▏         | 103/5500 [01:10<51:02,  1.76it/s, Mean Loss=0.19]

  2%|▏         | 103/5500 [01:10<51:02,  1.76it/s, Mean Loss=0.153]

  2%|▏         | 104/5500 [01:11<50:05,  1.80it/s, Mean Loss=0.153]

  2%|▏         | 104/5500 [01:11<50:05,  1.80it/s, Mean Loss=0.149]

  2%|▏         | 105/5500 [01:11<50:18,  1.79it/s, Mean Loss=0.149]

  2%|▏         | 105/5500 [01:11<50:18,  1.79it/s, Mean Loss=0.197]

  2%|▏         | 106/5500 [01:12<50:05,  1.79it/s, Mean Loss=0.197]

  2%|▏         | 106/5500 [01:12<50:05,  1.79it/s, Mean Loss=0.163]

  2%|▏         | 107/5500 [01:12<49:37,  1.81it/s, Mean Loss=0.163]

  2%|▏         | 107/5500 [01:12<49:37,  1.81it/s, Mean Loss=0.179]

  2%|▏         | 108/5500 [01:13<49:18,  1.82it/s, Mean Loss=0.179]

  2%|▏         | 108/5500 [01:13<49:18,  1.82it/s, Mean Loss=0.0918]

  2%|▏         | 109/5500 [01:13<49:36,  1.81it/s, Mean Loss=0.0918]

  2%|▏         | 109/5500 [01:13<49:36,  1.81it/s, Mean Loss=0.214] 




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 166.13it/s]




  2%|▏         | 110/5500 [01:14<55:12,  1.63it/s, Mean Loss=0.214]

  2%|▏         | 110/5500 [01:14<55:12,  1.63it/s, Mean Loss=0.129]

  2%|▏         | 111/5500 [01:15<53:18,  1.69it/s, Mean Loss=0.129]

  2%|▏         | 111/5500 [01:15<53:18,  1.69it/s, Mean Loss=0.126]

  2%|▏         | 112/5500 [01:15<51:47,  1.73it/s, Mean Loss=0.126]

  2%|▏         | 112/5500 [01:15<51:47,  1.73it/s, Mean Loss=0.127]

  2%|▏         | 113/5500 [01:16<50:38,  1.77it/s, Mean Loss=0.127]

  2%|▏         | 113/5500 [01:16<50:38,  1.77it/s, Mean Loss=0.17] 

  2%|▏         | 114/5500 [01:16<50:11,  1.79it/s, Mean Loss=0.17]

  2%|▏         | 114/5500 [01:16<50:11,  1.79it/s, Mean Loss=0.124]

  2%|▏         | 115/5500 [01:17<49:28,  1.81it/s, Mean Loss=0.124]

  2%|▏         | 115/5500 [01:17<49:28,  1.81it/s, Mean Loss=0.172]

  2%|▏         | 116/5500 [01:17<49:02,  1.83it/s, Mean Loss=0.172]

  2%|▏         | 116/5500 [01:17<49:02,  1.83it/s, Mean Loss=0.116]

  2%|▏         | 117/5500 [01:18<50:00,  1.79it/s, Mean Loss=0.116]

  2%|▏         | 117/5500 [01:18<50:00,  1.79it/s, Mean Loss=0.196]

  2%|▏         | 118/5500 [01:19<49:44,  1.80it/s, Mean Loss=0.196]

  2%|▏         | 118/5500 [01:19<49:44,  1.80it/s, Mean Loss=0.116]

  2%|▏         | 119/5500 [01:20<1:02:40,  1.43it/s, Mean Loss=0.116]

  2%|▏         | 119/5500 [01:20<1:02:40,  1.43it/s, Mean Loss=0.254]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 180.39it/s]




  2%|▏         | 120/5500 [01:20<1:01:19,  1.46it/s, Mean Loss=0.254]

  2%|▏         | 120/5500 [01:20<1:01:19,  1.46it/s, Mean Loss=0.147]

  2%|▏         | 121/5500 [01:21<57:16,  1.57it/s, Mean Loss=0.147]  

  2%|▏         | 121/5500 [01:21<57:16,  1.57it/s, Mean Loss=0.201]

  2%|▏         | 122/5500 [01:21<54:57,  1.63it/s, Mean Loss=0.201]

  2%|▏         | 122/5500 [01:21<54:57,  1.63it/s, Mean Loss=0.131]

  2%|▏         | 123/5500 [01:22<53:35,  1.67it/s, Mean Loss=0.131]

  2%|▏         | 123/5500 [01:22<53:35,  1.67it/s, Mean Loss=0.184]

  2%|▏         | 124/5500 [01:22<52:41,  1.70it/s, Mean Loss=0.184]

  2%|▏         | 124/5500 [01:22<52:41,  1.70it/s, Mean Loss=0.213]

  2%|▏         | 125/5500 [01:23<51:39,  1.73it/s, Mean Loss=0.213]

  2%|▏         | 125/5500 [01:23<51:39,  1.73it/s, Mean Loss=0.118]

  2%|▏         | 126/5500 [01:24<51:18,  1.75it/s, Mean Loss=0.118]

  2%|▏         | 126/5500 [01:24<51:18,  1.75it/s, Mean Loss=0.11] 

  2%|▏         | 127/5500 [01:24<51:32,  1.74it/s, Mean Loss=0.11]

  2%|▏         | 127/5500 [01:24<51:32,  1.74it/s, Mean Loss=0.187]

  2%|▏         | 128/5500 [01:25<50:23,  1.78it/s, Mean Loss=0.187]

  2%|▏         | 128/5500 [01:25<50:23,  1.78it/s, Mean Loss=0.159]

  2%|▏         | 129/5500 [01:25<50:21,  1.78it/s, Mean Loss=0.159]

  2%|▏         | 129/5500 [01:25<50:21,  1.78it/s, Mean Loss=0.241]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 179.84it/s]




  2%|▏         | 130/5500 [01:26<52:23,  1.71it/s, Mean Loss=0.241]

  2%|▏         | 130/5500 [01:26<52:23,  1.71it/s, Mean Loss=0.149]

  2%|▏         | 131/5500 [01:26<50:58,  1.76it/s, Mean Loss=0.149]

  2%|▏         | 131/5500 [01:26<50:58,  1.76it/s, Mean Loss=0.146]

  2%|▏         | 132/5500 [01:27<50:26,  1.77it/s, Mean Loss=0.146]

  2%|▏         | 132/5500 [01:27<50:26,  1.77it/s, Mean Loss=0.252]

  2%|▏         | 133/5500 [01:27<48:23,  1.85it/s, Mean Loss=0.252]

  2%|▏         | 133/5500 [01:27<48:23,  1.85it/s, Mean Loss=0.0749]

  2%|▏         | 134/5500 [01:28<49:03,  1.82it/s, Mean Loss=0.0749]

  2%|▏         | 134/5500 [01:28<49:03,  1.82it/s, Mean Loss=0.166] 

  2%|▏         | 135/5500 [01:29<49:55,  1.79it/s, Mean Loss=0.166]

  2%|▏         | 135/5500 [01:29<49:55,  1.79it/s, Mean Loss=0.249]

  2%|▏         | 136/5500 [01:29<49:17,  1.81it/s, Mean Loss=0.249]

  2%|▏         | 136/5500 [01:29<49:17,  1.81it/s, Mean Loss=0.201]

  2%|▏         | 137/5500 [01:30<48:50,  1.83it/s, Mean Loss=0.201]

  2%|▏         | 137/5500 [01:30<48:50,  1.83it/s, Mean Loss=0.186]

  3%|▎         | 138/5500 [01:30<47:32,  1.88it/s, Mean Loss=0.186]

  3%|▎         | 138/5500 [01:30<47:32,  1.88it/s, Mean Loss=0.163]

  3%|▎         | 139/5500 [01:31<50:14,  1.78it/s, Mean Loss=0.163]

  3%|▎         | 139/5500 [01:31<50:14,  1.78it/s, Mean Loss=0.152]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 180.56it/s]




  3%|▎         | 140/5500 [01:31<51:58,  1.72it/s, Mean Loss=0.152]

  3%|▎         | 140/5500 [01:31<51:58,  1.72it/s, Mean Loss=0.205]

  3%|▎         | 141/5500 [01:32<51:41,  1.73it/s, Mean Loss=0.205]

  3%|▎         | 141/5500 [01:32<51:41,  1.73it/s, Mean Loss=0.057]

  3%|▎         | 142/5500 [01:33<50:45,  1.76it/s, Mean Loss=0.057]

  3%|▎         | 142/5500 [01:33<50:45,  1.76it/s, Mean Loss=0.154]

  3%|▎         | 143/5500 [01:33<50:15,  1.78it/s, Mean Loss=0.154]

  3%|▎         | 143/5500 [01:33<50:15,  1.78it/s, Mean Loss=0.163]

  3%|▎         | 144/5500 [01:34<49:29,  1.80it/s, Mean Loss=0.163]

  3%|▎         | 144/5500 [01:34<49:29,  1.80it/s, Mean Loss=0.215]

  3%|▎         | 145/5500 [01:34<48:31,  1.84it/s, Mean Loss=0.215]

  3%|▎         | 145/5500 [01:34<48:31,  1.84it/s, Mean Loss=0.231]

  3%|▎         | 146/5500 [01:35<48:17,  1.85it/s, Mean Loss=0.231]

  3%|▎         | 146/5500 [01:35<48:17,  1.85it/s, Mean Loss=0.0874]

  3%|▎         | 147/5500 [01:35<48:57,  1.82it/s, Mean Loss=0.0874]

  3%|▎         | 147/5500 [01:35<48:57,  1.82it/s, Mean Loss=0.169] 

  3%|▎         | 148/5500 [01:36<48:49,  1.83it/s, Mean Loss=0.169]

  3%|▎         | 148/5500 [01:36<48:49,  1.83it/s, Mean Loss=0.141]

  3%|▎         | 149/5500 [01:36<48:54,  1.82it/s, Mean Loss=0.141]

  3%|▎         | 149/5500 [01:36<48:54,  1.82it/s, Mean Loss=0.168]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A




Evaluation:  93%|█████████▎| 14/15 [00:00<00:00, 121.25it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 129.91it/s]




  3%|▎         | 150/5500 [01:37<53:07,  1.68it/s, Mean Loss=0.168]

  3%|▎         | 150/5500 [01:37<53:07,  1.68it/s, Mean Loss=0.199]

  3%|▎         | 151/5500 [01:38<50:35,  1.76it/s, Mean Loss=0.199]

  3%|▎         | 151/5500 [01:38<50:35,  1.76it/s, Mean Loss=0.217]

  3%|▎         | 152/5500 [01:39<1:03:19,  1.41it/s, Mean Loss=0.217]

  3%|▎         | 152/5500 [01:39<1:03:19,  1.41it/s, Mean Loss=0.0978]

  3%|▎         | 153/5500 [01:39<58:07,  1.53it/s, Mean Loss=0.0978]  

  3%|▎         | 153/5500 [01:39<58:07,  1.53it/s, Mean Loss=0.0625]

  3%|▎         | 154/5500 [01:40<55:30,  1.61it/s, Mean Loss=0.0625]

  3%|▎         | 154/5500 [01:40<55:30,  1.61it/s, Mean Loss=0.12]  

  3%|▎         | 155/5500 [01:40<54:26,  1.64it/s, Mean Loss=0.12]

  3%|▎         | 155/5500 [01:40<54:26,  1.64it/s, Mean Loss=0.164]

  3%|▎         | 156/5500 [01:41<54:31,  1.63it/s, Mean Loss=0.164]

  3%|▎         | 156/5500 [01:41<54:31,  1.63it/s, Mean Loss=0.148]

  3%|▎         | 157/5500 [01:41<54:35,  1.63it/s, Mean Loss=0.148]

  3%|▎         | 157/5500 [01:42<54:35,  1.63it/s, Mean Loss=0.126]

  3%|▎         | 158/5500 [01:42<53:47,  1.65it/s, Mean Loss=0.126]

  3%|▎         | 158/5500 [01:42<53:47,  1.65it/s, Mean Loss=0.119]

  3%|▎         | 159/5500 [01:43<52:49,  1.69it/s, Mean Loss=0.119]

  3%|▎         | 159/5500 [01:43<52:49,  1.69it/s, Mean Loss=0.132]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 150.17it/s]




  3%|▎         | 160/5500 [01:43<54:40,  1.63it/s, Mean Loss=0.132]

  3%|▎         | 160/5500 [01:43<54:40,  1.63it/s, Mean Loss=0.215]

  3%|▎         | 161/5500 [01:44<52:30,  1.69it/s, Mean Loss=0.215]

  3%|▎         | 161/5500 [01:44<52:30,  1.69it/s, Mean Loss=0.196]

  3%|▎         | 162/5500 [01:44<51:27,  1.73it/s, Mean Loss=0.196]

  3%|▎         | 162/5500 [01:44<51:27,  1.73it/s, Mean Loss=0.147]

  3%|▎         | 163/5500 [01:45<50:34,  1.76it/s, Mean Loss=0.147]

  3%|▎         | 163/5500 [01:45<50:34,  1.76it/s, Mean Loss=0.211]

  3%|▎         | 164/5500 [01:45<50:11,  1.77it/s, Mean Loss=0.211]

  3%|▎         | 164/5500 [01:46<50:11,  1.77it/s, Mean Loss=0.176]

  3%|▎         | 165/5500 [01:46<50:15,  1.77it/s, Mean Loss=0.176]

  3%|▎         | 165/5500 [01:46<50:15,  1.77it/s, Mean Loss=0.216]

  3%|▎         | 166/5500 [01:47<51:07,  1.74it/s, Mean Loss=0.216]

  3%|▎         | 166/5500 [01:47<51:07,  1.74it/s, Mean Loss=0.137]

  3%|▎         | 167/5500 [01:47<52:58,  1.68it/s, Mean Loss=0.137]

  3%|▎         | 167/5500 [01:47<52:58,  1.68it/s, Mean Loss=0.156]

  3%|▎         | 168/5500 [01:48<57:32,  1.54it/s, Mean Loss=0.156]

  3%|▎         | 168/5500 [01:48<57:32,  1.54it/s, Mean Loss=0.196]

  3%|▎         | 169/5500 [01:49<54:05,  1.64it/s, Mean Loss=0.196]

  3%|▎         | 169/5500 [01:49<54:05,  1.64it/s, Mean Loss=0.148]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 176.04it/s]




  3%|▎         | 170/5500 [01:49<54:17,  1.64it/s, Mean Loss=0.148]

  3%|▎         | 170/5500 [01:49<54:17,  1.64it/s, Mean Loss=0.17] 

  3%|▎         | 171/5500 [01:50<52:12,  1.70it/s, Mean Loss=0.17]

  3%|▎         | 171/5500 [01:50<52:12,  1.70it/s, Mean Loss=0.107]

  3%|▎         | 172/5500 [01:50<50:19,  1.76it/s, Mean Loss=0.107]

  3%|▎         | 172/5500 [01:50<50:19,  1.76it/s, Mean Loss=0.165]

  3%|▎         | 173/5500 [01:51<49:25,  1.80it/s, Mean Loss=0.165]

  3%|▎         | 173/5500 [01:51<49:25,  1.80it/s, Mean Loss=0.162]

  3%|▎         | 174/5500 [01:51<48:22,  1.84it/s, Mean Loss=0.162]

  3%|▎         | 174/5500 [01:51<48:22,  1.84it/s, Mean Loss=0.212]

  3%|▎         | 175/5500 [01:52<48:27,  1.83it/s, Mean Loss=0.212]

  3%|▎         | 175/5500 [01:52<48:27,  1.83it/s, Mean Loss=0.17] 

  3%|▎         | 176/5500 [01:52<48:32,  1.83it/s, Mean Loss=0.17]

  3%|▎         | 176/5500 [01:52<48:32,  1.83it/s, Mean Loss=0.15]

  3%|▎         | 177/5500 [01:53<48:33,  1.83it/s, Mean Loss=0.15]

  3%|▎         | 177/5500 [01:53<48:33,  1.83it/s, Mean Loss=0.141]

  3%|▎         | 178/5500 [01:53<47:47,  1.86it/s, Mean Loss=0.141]

  3%|▎         | 178/5500 [01:53<47:47,  1.86it/s, Mean Loss=0.177]

  3%|▎         | 179/5500 [01:54<48:02,  1.85it/s, Mean Loss=0.177]

  3%|▎         | 179/5500 [01:54<48:02,  1.85it/s, Mean Loss=0.2]  




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 174.36it/s]




  3%|▎         | 180/5500 [01:55<52:42,  1.68it/s, Mean Loss=0.2]

  3%|▎         | 180/5500 [01:55<52:42,  1.68it/s, Mean Loss=0.199]

  3%|▎         | 181/5500 [01:55<50:39,  1.75it/s, Mean Loss=0.199]

  3%|▎         | 181/5500 [01:55<50:39,  1.75it/s, Mean Loss=0.194]

  3%|▎         | 182/5500 [01:56<49:35,  1.79it/s, Mean Loss=0.194]

  3%|▎         | 182/5500 [01:56<49:35,  1.79it/s, Mean Loss=0.0835]

  3%|▎         | 183/5500 [01:56<48:51,  1.81it/s, Mean Loss=0.0835]

  3%|▎         | 183/5500 [01:56<48:51,  1.81it/s, Mean Loss=0.181] 

  3%|▎         | 184/5500 [01:57<48:21,  1.83it/s, Mean Loss=0.181]

  3%|▎         | 184/5500 [01:57<48:21,  1.83it/s, Mean Loss=0.181]

  3%|▎         | 185/5500 [01:57<48:01,  1.84it/s, Mean Loss=0.181]

  3%|▎         | 185/5500 [01:57<48:01,  1.84it/s, Mean Loss=0.193]

  3%|▎         | 186/5500 [01:58<47:46,  1.85it/s, Mean Loss=0.193]

  3%|▎         | 186/5500 [01:58<47:46,  1.85it/s, Mean Loss=0.215]

  3%|▎         | 187/5500 [01:58<48:00,  1.84it/s, Mean Loss=0.215]

  3%|▎         | 187/5500 [01:58<48:00,  1.84it/s, Mean Loss=0.164]

  3%|▎         | 188/5500 [01:59<48:45,  1.82it/s, Mean Loss=0.164]

  3%|▎         | 188/5500 [01:59<48:45,  1.82it/s, Mean Loss=0.127]

  3%|▎         | 189/5500 [02:00<49:57,  1.77it/s, Mean Loss=0.127]

  3%|▎         | 189/5500 [02:00<49:57,  1.77it/s, Mean Loss=0.122]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 177.02it/s]




  3%|▎         | 190/5500 [02:00<51:23,  1.72it/s, Mean Loss=0.122]

  3%|▎         | 190/5500 [02:00<51:23,  1.72it/s, Mean Loss=0.114]

  3%|▎         | 191/5500 [02:01<50:05,  1.77it/s, Mean Loss=0.114]

  3%|▎         | 191/5500 [02:01<50:05,  1.77it/s, Mean Loss=0.168]

  3%|▎         | 192/5500 [02:01<50:35,  1.75it/s, Mean Loss=0.168]

  3%|▎         | 192/5500 [02:01<50:35,  1.75it/s, Mean Loss=0.194]

  4%|▎         | 193/5500 [02:02<49:48,  1.78it/s, Mean Loss=0.194]

  4%|▎         | 193/5500 [02:02<49:48,  1.78it/s, Mean Loss=0.0792]

  4%|▎         | 194/5500 [02:03<1:05:24,  1.35it/s, Mean Loss=0.0792]

  4%|▎         | 194/5500 [02:03<1:05:24,  1.35it/s, Mean Loss=0.208] 

  4%|▎         | 195/5500 [02:04<1:00:56,  1.45it/s, Mean Loss=0.208]

  4%|▎         | 195/5500 [02:04<1:00:56,  1.45it/s, Mean Loss=0.185]

  4%|▎         | 196/5500 [02:04<56:47,  1.56it/s, Mean Loss=0.185]  

  4%|▎         | 196/5500 [02:04<56:47,  1.56it/s, Mean Loss=0.136]

  4%|▎         | 197/5500 [02:05<53:16,  1.66it/s, Mean Loss=0.136]

  4%|▎         | 197/5500 [02:05<53:16,  1.66it/s, Mean Loss=0.223]

  4%|▎         | 198/5500 [02:05<51:59,  1.70it/s, Mean Loss=0.223]

  4%|▎         | 198/5500 [02:05<51:59,  1.70it/s, Mean Loss=0.145]

  4%|▎         | 199/5500 [02:06<50:06,  1.76it/s, Mean Loss=0.145]

  4%|▎         | 199/5500 [02:06<50:06,  1.76it/s, Mean Loss=0.113]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 191.99it/s]




  4%|▎         | 200/5500 [02:06<51:16,  1.72it/s, Mean Loss=0.113]

  4%|▎         | 200/5500 [02:06<51:16,  1.72it/s, Mean Loss=0.112]

  4%|▎         | 201/5500 [02:07<48:47,  1.81it/s, Mean Loss=0.112]

  4%|▎         | 201/5500 [02:07<48:47,  1.81it/s, Mean Loss=0.131]

  4%|▎         | 202/5500 [02:07<47:29,  1.86it/s, Mean Loss=0.131]

  4%|▎         | 202/5500 [02:07<47:29,  1.86it/s, Mean Loss=0.182]

  4%|▎         | 203/5500 [02:08<45:52,  1.92it/s, Mean Loss=0.182]

  4%|▎         | 203/5500 [02:08<45:52,  1.92it/s, Mean Loss=0.0933]

  4%|▎         | 204/5500 [02:08<45:24,  1.94it/s, Mean Loss=0.0933]

  4%|▎         | 204/5500 [02:08<45:24,  1.94it/s, Mean Loss=0.0973]

  4%|▎         | 205/5500 [02:09<45:26,  1.94it/s, Mean Loss=0.0973]

  4%|▎         | 205/5500 [02:09<45:26,  1.94it/s, Mean Loss=0.22]  

  4%|▎         | 206/5500 [02:09<44:55,  1.96it/s, Mean Loss=0.22]

  4%|▎         | 206/5500 [02:09<44:55,  1.96it/s, Mean Loss=0.0885]

  4%|▍         | 207/5500 [02:10<44:18,  1.99it/s, Mean Loss=0.0885]

  4%|▍         | 207/5500 [02:10<44:18,  1.99it/s, Mean Loss=0.163] 

  4%|▍         | 208/5500 [02:10<44:08,  2.00it/s, Mean Loss=0.163]

  4%|▍         | 208/5500 [02:10<44:08,  2.00it/s, Mean Loss=0.206]

  4%|▍         | 209/5500 [02:11<44:08,  2.00it/s, Mean Loss=0.206]

  4%|▍         | 209/5500 [02:11<44:08,  2.00it/s, Mean Loss=0.114]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 171.94it/s]




  4%|▍         | 210/5500 [02:11<46:03,  1.91it/s, Mean Loss=0.114]

  4%|▍         | 210/5500 [02:11<46:03,  1.91it/s, Mean Loss=0.0782]

  4%|▍         | 211/5500 [02:12<45:15,  1.95it/s, Mean Loss=0.0782]

  4%|▍         | 211/5500 [02:12<45:15,  1.95it/s, Mean Loss=0.135] 

  4%|▍         | 212/5500 [02:12<45:51,  1.92it/s, Mean Loss=0.135]

  4%|▍         | 212/5500 [02:12<45:51,  1.92it/s, Mean Loss=0.15] 

  4%|▍         | 213/5500 [02:13<45:06,  1.95it/s, Mean Loss=0.15]

  4%|▍         | 213/5500 [02:13<45:06,  1.95it/s, Mean Loss=0.131]

  4%|▍         | 214/5500 [02:13<44:05,  2.00it/s, Mean Loss=0.131]

  4%|▍         | 214/5500 [02:13<44:05,  2.00it/s, Mean Loss=0.168]

  4%|▍         | 215/5500 [02:14<42:45,  2.06it/s, Mean Loss=0.168]

  4%|▍         | 215/5500 [02:14<42:45,  2.06it/s, Mean Loss=0.199]

  4%|▍         | 216/5500 [02:14<45:15,  1.95it/s, Mean Loss=0.199]

  4%|▍         | 216/5500 [02:14<45:15,  1.95it/s, Mean Loss=0.139]

  4%|▍         | 217/5500 [02:15<44:04,  2.00it/s, Mean Loss=0.139]

  4%|▍         | 217/5500 [02:15<44:04,  2.00it/s, Mean Loss=0.238]

  4%|▍         | 218/5500 [02:15<43:35,  2.02it/s, Mean Loss=0.238]

  4%|▍         | 218/5500 [02:15<43:35,  2.02it/s, Mean Loss=0.0917]

  4%|▍         | 219/5500 [02:16<42:49,  2.05it/s, Mean Loss=0.0917]

  4%|▍         | 219/5500 [02:16<42:49,  2.05it/s, Mean Loss=0.106] 




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 179.95it/s]




  4%|▍         | 220/5500 [02:16<45:21,  1.94it/s, Mean Loss=0.106]

  4%|▍         | 220/5500 [02:16<45:21,  1.94it/s, Mean Loss=0.155]

  4%|▍         | 221/5500 [02:17<44:54,  1.96it/s, Mean Loss=0.155]

  4%|▍         | 221/5500 [02:17<44:54,  1.96it/s, Mean Loss=0.189]

  4%|▍         | 222/5500 [02:17<44:21,  1.98it/s, Mean Loss=0.189]

  4%|▍         | 222/5500 [02:17<44:21,  1.98it/s, Mean Loss=0.154]

  4%|▍         | 223/5500 [02:18<44:32,  1.97it/s, Mean Loss=0.154]

  4%|▍         | 223/5500 [02:18<44:32,  1.97it/s, Mean Loss=0.133]

  4%|▍         | 224/5500 [02:18<44:10,  1.99it/s, Mean Loss=0.133]

  4%|▍         | 224/5500 [02:18<44:10,  1.99it/s, Mean Loss=0.137]

  4%|▍         | 225/5500 [02:19<43:24,  2.03it/s, Mean Loss=0.137]

  4%|▍         | 225/5500 [02:19<43:24,  2.03it/s, Mean Loss=0.156]

  4%|▍         | 226/5500 [02:19<43:33,  2.02it/s, Mean Loss=0.156]

  4%|▍         | 226/5500 [02:19<43:33,  2.02it/s, Mean Loss=0.118]

  4%|▍         | 227/5500 [02:20<42:50,  2.05it/s, Mean Loss=0.118]

  4%|▍         | 227/5500 [02:20<42:50,  2.05it/s, Mean Loss=0.184]

  4%|▍         | 228/5500 [02:20<42:13,  2.08it/s, Mean Loss=0.184]

  4%|▍         | 228/5500 [02:20<42:13,  2.08it/s, Mean Loss=0.0434]

  4%|▍         | 229/5500 [02:21<42:43,  2.06it/s, Mean Loss=0.0434]

  4%|▍         | 229/5500 [02:21<42:43,  2.06it/s, Mean Loss=0.102] 




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 174.13it/s]




  4%|▍         | 230/5500 [02:21<45:15,  1.94it/s, Mean Loss=0.102]

  4%|▍         | 230/5500 [02:21<45:15,  1.94it/s, Mean Loss=0.211]

  4%|▍         | 231/5500 [02:22<44:50,  1.96it/s, Mean Loss=0.211]

  4%|▍         | 231/5500 [02:22<44:50,  1.96it/s, Mean Loss=0.122]

  4%|▍         | 232/5500 [02:22<44:06,  1.99it/s, Mean Loss=0.122]

  4%|▍         | 232/5500 [02:22<44:06,  1.99it/s, Mean Loss=0.181]

  4%|▍         | 233/5500 [02:23<44:28,  1.97it/s, Mean Loss=0.181]

  4%|▍         | 233/5500 [02:23<44:28,  1.97it/s, Mean Loss=0.117]

  4%|▍         | 234/5500 [02:23<43:49,  2.00it/s, Mean Loss=0.117]

  4%|▍         | 234/5500 [02:23<43:49,  2.00it/s, Mean Loss=0.0663]

  4%|▍         | 235/5500 [02:24<43:51,  2.00it/s, Mean Loss=0.0663]

  4%|▍         | 235/5500 [02:24<43:51,  2.00it/s, Mean Loss=0.186] 

  4%|▍         | 236/5500 [02:24<43:01,  2.04it/s, Mean Loss=0.186]

  4%|▍         | 236/5500 [02:24<43:01,  2.04it/s, Mean Loss=0.173]

  4%|▍         | 237/5500 [02:25<42:19,  2.07it/s, Mean Loss=0.173]

  4%|▍         | 237/5500 [02:25<42:19,  2.07it/s, Mean Loss=0.148]

  4%|▍         | 238/5500 [02:25<42:48,  2.05it/s, Mean Loss=0.148]

  4%|▍         | 238/5500 [02:25<42:48,  2.05it/s, Mean Loss=0.129]

  4%|▍         | 239/5500 [02:26<43:30,  2.02it/s, Mean Loss=0.129]

  4%|▍         | 239/5500 [02:26<43:30,  2.02it/s, Mean Loss=0.133]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 180.34it/s]




  4%|▍         | 240/5500 [02:26<47:06,  1.86it/s, Mean Loss=0.133]

  4%|▍         | 240/5500 [02:26<47:06,  1.86it/s, Mean Loss=0.181]

  4%|▍         | 241/5500 [02:27<46:32,  1.88it/s, Mean Loss=0.181]

  4%|▍         | 241/5500 [02:27<46:32,  1.88it/s, Mean Loss=0.119]

  4%|▍         | 242/5500 [02:28<46:09,  1.90it/s, Mean Loss=0.119]

  4%|▍         | 242/5500 [02:28<46:09,  1.90it/s, Mean Loss=0.122]

  4%|▍         | 243/5500 [02:28<45:52,  1.91it/s, Mean Loss=0.122]

  4%|▍         | 243/5500 [02:28<45:52,  1.91it/s, Mean Loss=0.139]

  4%|▍         | 244/5500 [02:29<45:14,  1.94it/s, Mean Loss=0.139]

  4%|▍         | 244/5500 [02:29<45:14,  1.94it/s, Mean Loss=0.158]

  4%|▍         | 245/5500 [02:29<44:21,  1.97it/s, Mean Loss=0.158]

  4%|▍         | 245/5500 [02:29<44:21,  1.97it/s, Mean Loss=0.207]

  4%|▍         | 246/5500 [02:30<44:37,  1.96it/s, Mean Loss=0.207]

  4%|▍         | 246/5500 [02:30<44:37,  1.96it/s, Mean Loss=0.197]

  4%|▍         | 247/5500 [02:30<44:51,  1.95it/s, Mean Loss=0.197]

  4%|▍         | 247/5500 [02:30<44:51,  1.95it/s, Mean Loss=0.188]

  5%|▍         | 248/5500 [02:31<45:19,  1.93it/s, Mean Loss=0.188]

  5%|▍         | 248/5500 [02:31<45:19,  1.93it/s, Mean Loss=0.158]

  5%|▍         | 249/5500 [02:31<45:17,  1.93it/s, Mean Loss=0.158]

  5%|▍         | 249/5500 [02:31<45:17,  1.93it/s, Mean Loss=0.149]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 179.44it/s]




  5%|▍         | 250/5500 [02:32<1:08:25,  1.28it/s, Mean Loss=0.149]

  5%|▍         | 250/5500 [02:33<1:08:25,  1.28it/s, Mean Loss=0.138]

  5%|▍         | 251/5500 [02:33<1:03:10,  1.38it/s, Mean Loss=0.138]

  5%|▍         | 251/5500 [02:33<1:03:10,  1.38it/s, Mean Loss=0.231]

  5%|▍         | 252/5500 [02:34<56:54,  1.54it/s, Mean Loss=0.231]  

  5%|▍         | 252/5500 [02:34<56:54,  1.54it/s, Mean Loss=0.118]

  5%|▍         | 253/5500 [02:34<52:56,  1.65it/s, Mean Loss=0.118]

  5%|▍         | 253/5500 [02:34<52:56,  1.65it/s, Mean Loss=0.299]

  5%|▍         | 254/5500 [02:35<51:02,  1.71it/s, Mean Loss=0.299]

  5%|▍         | 254/5500 [02:35<51:02,  1.71it/s, Mean Loss=0.174]

  5%|▍         | 255/5500 [02:35<49:42,  1.76it/s, Mean Loss=0.174]

  5%|▍         | 255/5500 [02:35<49:42,  1.76it/s, Mean Loss=0.229]

  5%|▍         | 256/5500 [02:36<53:34,  1.63it/s, Mean Loss=0.229]

  5%|▍         | 256/5500 [02:36<53:34,  1.63it/s, Mean Loss=0.159]

  5%|▍         | 257/5500 [02:36<52:20,  1.67it/s, Mean Loss=0.159]

  5%|▍         | 257/5500 [02:36<52:20,  1.67it/s, Mean Loss=0.161]

  5%|▍         | 258/5500 [02:37<51:28,  1.70it/s, Mean Loss=0.161]

  5%|▍         | 258/5500 [02:37<51:28,  1.70it/s, Mean Loss=0.148]

  5%|▍         | 259/5500 [02:38<51:44,  1.69it/s, Mean Loss=0.148]

  5%|▍         | 259/5500 [02:38<51:44,  1.69it/s, Mean Loss=0.122]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 180.31it/s]




  5%|▍         | 260/5500 [02:38<55:52,  1.56it/s, Mean Loss=0.122]

  5%|▍         | 260/5500 [02:38<55:52,  1.56it/s, Mean Loss=0.143]

  5%|▍         | 261/5500 [02:39<52:47,  1.65it/s, Mean Loss=0.143]

  5%|▍         | 261/5500 [02:39<52:47,  1.65it/s, Mean Loss=0.206]

  5%|▍         | 262/5500 [02:39<51:16,  1.70it/s, Mean Loss=0.206]

  5%|▍         | 262/5500 [02:39<51:16,  1.70it/s, Mean Loss=0.204]

  5%|▍         | 263/5500 [02:40<50:06,  1.74it/s, Mean Loss=0.204]

  5%|▍         | 263/5500 [02:40<50:06,  1.74it/s, Mean Loss=0.266]

  5%|▍         | 264/5500 [02:41<50:24,  1.73it/s, Mean Loss=0.266]

  5%|▍         | 264/5500 [02:41<50:24,  1.73it/s, Mean Loss=0.281]

  5%|▍         | 265/5500 [02:41<49:40,  1.76it/s, Mean Loss=0.281]

  5%|▍         | 265/5500 [02:41<49:40,  1.76it/s, Mean Loss=0.14] 

  5%|▍         | 266/5500 [02:42<49:33,  1.76it/s, Mean Loss=0.14]

  5%|▍         | 266/5500 [02:42<49:33,  1.76it/s, Mean Loss=0.0697]

  5%|▍         | 267/5500 [02:42<49:35,  1.76it/s, Mean Loss=0.0697]

  5%|▍         | 267/5500 [02:42<49:35,  1.76it/s, Mean Loss=0.0613]

  5%|▍         | 268/5500 [02:43<49:04,  1.78it/s, Mean Loss=0.0613]

  5%|▍         | 268/5500 [02:43<49:04,  1.78it/s, Mean Loss=0.152] 

  5%|▍         | 269/5500 [02:43<50:54,  1.71it/s, Mean Loss=0.152]

  5%|▍         | 269/5500 [02:43<50:54,  1.71it/s, Mean Loss=0.365]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 171.31it/s]




  5%|▍         | 270/5500 [02:44<51:44,  1.68it/s, Mean Loss=0.365]

  5%|▍         | 270/5500 [02:44<51:44,  1.68it/s, Mean Loss=0.102]

  5%|▍         | 271/5500 [02:45<49:40,  1.75it/s, Mean Loss=0.102]

  5%|▍         | 271/5500 [02:45<49:40,  1.75it/s, Mean Loss=0.209]

  5%|▍         | 272/5500 [02:45<49:08,  1.77it/s, Mean Loss=0.209]

  5%|▍         | 272/5500 [02:45<49:08,  1.77it/s, Mean Loss=0.16] 

  5%|▍         | 273/5500 [02:46<49:22,  1.76it/s, Mean Loss=0.16]

  5%|▍         | 273/5500 [02:46<49:22,  1.76it/s, Mean Loss=0.17]

  5%|▍         | 274/5500 [02:46<50:35,  1.72it/s, Mean Loss=0.17]

  5%|▍         | 274/5500 [02:46<50:35,  1.72it/s, Mean Loss=0.168]

  5%|▌         | 275/5500 [02:47<51:16,  1.70it/s, Mean Loss=0.168]

  5%|▌         | 275/5500 [02:47<51:16,  1.70it/s, Mean Loss=0.0704]

  5%|▌         | 276/5500 [02:47<50:50,  1.71it/s, Mean Loss=0.0704]

  5%|▌         | 276/5500 [02:47<50:50,  1.71it/s, Mean Loss=0.229] 

  5%|▌         | 277/5500 [02:48<49:25,  1.76it/s, Mean Loss=0.229]

  5%|▌         | 277/5500 [02:48<49:25,  1.76it/s, Mean Loss=0.0791]

  5%|▌         | 278/5500 [02:49<48:37,  1.79it/s, Mean Loss=0.0791]

  5%|▌         | 278/5500 [02:49<48:37,  1.79it/s, Mean Loss=0.171] 

  5%|▌         | 279/5500 [02:49<48:45,  1.78it/s, Mean Loss=0.171]

  5%|▌         | 279/5500 [02:49<48:45,  1.78it/s, Mean Loss=0.165]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 180.39it/s]




  5%|▌         | 280/5500 [02:50<52:06,  1.67it/s, Mean Loss=0.165]

  5%|▌         | 280/5500 [02:50<52:06,  1.67it/s, Mean Loss=0.172]

  5%|▌         | 281/5500 [02:50<50:13,  1.73it/s, Mean Loss=0.172]

  5%|▌         | 281/5500 [02:50<50:13,  1.73it/s, Mean Loss=0.19] 

  5%|▌         | 282/5500 [02:51<57:00,  1.53it/s, Mean Loss=0.19]

  5%|▌         | 282/5500 [02:51<57:00,  1.53it/s, Mean Loss=0.233]

  5%|▌         | 283/5500 [02:52<1:01:30,  1.41it/s, Mean Loss=0.233]

  5%|▌         | 283/5500 [02:52<1:01:30,  1.41it/s, Mean Loss=0.129]

  5%|▌         | 284/5500 [02:53<59:05,  1.47it/s, Mean Loss=0.129]  

  5%|▌         | 284/5500 [02:53<59:05,  1.47it/s, Mean Loss=0.222]

  5%|▌         | 285/5500 [02:53<57:53,  1.50it/s, Mean Loss=0.222]

  5%|▌         | 285/5500 [02:53<57:53,  1.50it/s, Mean Loss=0.254]

  5%|▌         | 286/5500 [02:54<55:15,  1.57it/s, Mean Loss=0.254]

  5%|▌         | 286/5500 [02:54<55:15,  1.57it/s, Mean Loss=0.132]

  5%|▌         | 287/5500 [02:54<52:38,  1.65it/s, Mean Loss=0.132]

  5%|▌         | 287/5500 [02:54<52:38,  1.65it/s, Mean Loss=0.139]

  5%|▌         | 288/5500 [02:55<51:12,  1.70it/s, Mean Loss=0.139]

  5%|▌         | 288/5500 [02:55<51:12,  1.70it/s, Mean Loss=0.14] 

  5%|▌         | 289/5500 [02:55<50:26,  1.72it/s, Mean Loss=0.14]

  5%|▌         | 289/5500 [02:55<50:26,  1.72it/s, Mean Loss=0.16]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 176.98it/s]




  5%|▌         | 290/5500 [02:56<52:00,  1.67it/s, Mean Loss=0.16]

  5%|▌         | 290/5500 [02:56<52:00,  1.67it/s, Mean Loss=0.184]

  5%|▌         | 291/5500 [02:57<50:33,  1.72it/s, Mean Loss=0.184]

  5%|▌         | 291/5500 [02:57<50:33,  1.72it/s, Mean Loss=0.167]

  5%|▌         | 292/5500 [02:57<49:50,  1.74it/s, Mean Loss=0.167]

  5%|▌         | 292/5500 [02:57<49:50,  1.74it/s, Mean Loss=0.148]

  5%|▌         | 293/5500 [02:58<50:32,  1.72it/s, Mean Loss=0.148]

  5%|▌         | 293/5500 [02:58<50:32,  1.72it/s, Mean Loss=0.171]

  5%|▌         | 294/5500 [02:58<49:56,  1.74it/s, Mean Loss=0.171]

  5%|▌         | 294/5500 [02:58<49:56,  1.74it/s, Mean Loss=0.0973]

  5%|▌         | 295/5500 [02:59<48:49,  1.78it/s, Mean Loss=0.0973]

  5%|▌         | 295/5500 [02:59<48:49,  1.78it/s, Mean Loss=0.178] 

  5%|▌         | 296/5500 [02:59<48:25,  1.79it/s, Mean Loss=0.178]

  5%|▌         | 296/5500 [02:59<48:25,  1.79it/s, Mean Loss=0.288]

  5%|▌         | 297/5500 [03:00<47:49,  1.81it/s, Mean Loss=0.288]

  5%|▌         | 297/5500 [03:00<47:49,  1.81it/s, Mean Loss=0.207]

  5%|▌         | 298/5500 [03:00<47:39,  1.82it/s, Mean Loss=0.207]

  5%|▌         | 298/5500 [03:00<47:39,  1.82it/s, Mean Loss=0.157]

  5%|▌         | 299/5500 [03:01<46:51,  1.85it/s, Mean Loss=0.157]

  5%|▌         | 299/5500 [03:01<46:51,  1.85it/s, Mean Loss=0.115]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 212.88it/s]




  5%|▌         | 300/5500 [03:02<49:42,  1.74it/s, Mean Loss=0.115]

  5%|▌         | 300/5500 [03:02<49:42,  1.74it/s, Mean Loss=0.217]

  5%|▌         | 301/5500 [03:02<48:39,  1.78it/s, Mean Loss=0.217]

  5%|▌         | 301/5500 [03:02<48:39,  1.78it/s, Mean Loss=0.179]

  5%|▌         | 302/5500 [03:03<47:54,  1.81it/s, Mean Loss=0.179]

  5%|▌         | 302/5500 [03:03<47:54,  1.81it/s, Mean Loss=0.136]

  6%|▌         | 303/5500 [03:03<47:12,  1.83it/s, Mean Loss=0.136]

  6%|▌         | 303/5500 [03:03<47:12,  1.83it/s, Mean Loss=0.145]

  6%|▌         | 304/5500 [03:04<46:06,  1.88it/s, Mean Loss=0.145]

  6%|▌         | 304/5500 [03:04<46:06,  1.88it/s, Mean Loss=0.158]

  6%|▌         | 305/5500 [03:04<46:07,  1.88it/s, Mean Loss=0.158]

  6%|▌         | 305/5500 [03:04<46:07,  1.88it/s, Mean Loss=0.179]

  6%|▌         | 306/5500 [03:05<45:20,  1.91it/s, Mean Loss=0.179]

  6%|▌         | 306/5500 [03:05<45:20,  1.91it/s, Mean Loss=0.126]

  6%|▌         | 307/5500 [03:05<45:13,  1.91it/s, Mean Loss=0.126]

  6%|▌         | 307/5500 [03:05<45:13,  1.91it/s, Mean Loss=0.0937]

  6%|▌         | 308/5500 [03:06<45:52,  1.89it/s, Mean Loss=0.0937]

  6%|▌         | 308/5500 [03:06<45:52,  1.89it/s, Mean Loss=0.113] 

  6%|▌         | 309/5500 [03:06<46:42,  1.85it/s, Mean Loss=0.113]

  6%|▌         | 309/5500 [03:06<46:42,  1.85it/s, Mean Loss=0.0854]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 239.96it/s]




  6%|▌         | 310/5500 [03:07<53:09,  1.63it/s, Mean Loss=0.0854]

  6%|▌         | 310/5500 [03:07<53:09,  1.63it/s, Mean Loss=0.206] 

  6%|▌         | 311/5500 [03:08<52:21,  1.65it/s, Mean Loss=0.206]

  6%|▌         | 311/5500 [03:08<52:21,  1.65it/s, Mean Loss=0.217]

  6%|▌         | 312/5500 [03:08<52:59,  1.63it/s, Mean Loss=0.217]

  6%|▌         | 312/5500 [03:08<52:59,  1.63it/s, Mean Loss=0.149]

  6%|▌         | 313/5500 [03:09<50:59,  1.70it/s, Mean Loss=0.149]

  6%|▌         | 313/5500 [03:09<50:59,  1.70it/s, Mean Loss=0.148]

  6%|▌         | 314/5500 [03:10<50:16,  1.72it/s, Mean Loss=0.148]

  6%|▌         | 314/5500 [03:10<50:16,  1.72it/s, Mean Loss=0.199]

  6%|▌         | 315/5500 [03:11<1:22:16,  1.05it/s, Mean Loss=0.199]

  6%|▌         | 315/5500 [03:11<1:22:16,  1.05it/s, Mean Loss=0.155]

  6%|▌         | 316/5500 [03:12<1:11:53,  1.20it/s, Mean Loss=0.155]

  6%|▌         | 316/5500 [03:12<1:11:53,  1.20it/s, Mean Loss=0.083]

  6%|▌         | 317/5500 [03:12<1:04:56,  1.33it/s, Mean Loss=0.083]

  6%|▌         | 317/5500 [03:12<1:04:56,  1.33it/s, Mean Loss=0.171]

  6%|▌         | 318/5500 [03:13<1:00:15,  1.43it/s, Mean Loss=0.171]

  6%|▌         | 318/5500 [03:13<1:00:15,  1.43it/s, Mean Loss=0.0769]

  6%|▌         | 319/5500 [03:14<57:14,  1.51it/s, Mean Loss=0.0769]  

  6%|▌         | 319/5500 [03:14<57:14,  1.51it/s, Mean Loss=0.152] 




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 187.73it/s]




  6%|▌         | 320/5500 [03:14<58:06,  1.49it/s, Mean Loss=0.152]

  6%|▌         | 320/5500 [03:14<58:06,  1.49it/s, Mean Loss=0.115]

  6%|▌         | 321/5500 [03:15<54:53,  1.57it/s, Mean Loss=0.115]

  6%|▌         | 321/5500 [03:15<54:53,  1.57it/s, Mean Loss=0.186]

  6%|▌         | 322/5500 [03:15<54:48,  1.57it/s, Mean Loss=0.186]

  6%|▌         | 322/5500 [03:15<54:48,  1.57it/s, Mean Loss=0.127]

  6%|▌         | 323/5500 [03:16<53:01,  1.63it/s, Mean Loss=0.127]

  6%|▌         | 323/5500 [03:16<53:01,  1.63it/s, Mean Loss=0.179]

  6%|▌         | 324/5500 [03:17<51:19,  1.68it/s, Mean Loss=0.179]

  6%|▌         | 324/5500 [03:17<51:19,  1.68it/s, Mean Loss=0.114]

  6%|▌         | 325/5500 [03:17<50:09,  1.72it/s, Mean Loss=0.114]

  6%|▌         | 325/5500 [03:17<50:09,  1.72it/s, Mean Loss=0.188]

  6%|▌         | 326/5500 [03:18<49:49,  1.73it/s, Mean Loss=0.188]

  6%|▌         | 326/5500 [03:18<49:49,  1.73it/s, Mean Loss=0.182]

  6%|▌         | 327/5500 [03:18<49:27,  1.74it/s, Mean Loss=0.182]

  6%|▌         | 327/5500 [03:18<49:27,  1.74it/s, Mean Loss=0.128]

  6%|▌         | 328/5500 [03:19<49:15,  1.75it/s, Mean Loss=0.128]

  6%|▌         | 328/5500 [03:19<49:15,  1.75it/s, Mean Loss=0.18] 

  6%|▌         | 329/5500 [03:19<49:35,  1.74it/s, Mean Loss=0.18]

  6%|▌         | 329/5500 [03:19<49:35,  1.74it/s, Mean Loss=0.178]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 186.50it/s]




  6%|▌         | 330/5500 [03:20<50:45,  1.70it/s, Mean Loss=0.178]

  6%|▌         | 330/5500 [03:20<50:45,  1.70it/s, Mean Loss=0.174]

  6%|▌         | 331/5500 [03:21<50:04,  1.72it/s, Mean Loss=0.174]

  6%|▌         | 331/5500 [03:21<50:04,  1.72it/s, Mean Loss=0.157]

  6%|▌         | 332/5500 [03:21<48:45,  1.77it/s, Mean Loss=0.157]

  6%|▌         | 332/5500 [03:21<48:45,  1.77it/s, Mean Loss=0.132]

  6%|▌         | 333/5500 [03:22<48:45,  1.77it/s, Mean Loss=0.132]

  6%|▌         | 333/5500 [03:22<48:45,  1.77it/s, Mean Loss=0.0791]

  6%|▌         | 334/5500 [03:22<49:36,  1.74it/s, Mean Loss=0.0791]

  6%|▌         | 334/5500 [03:22<49:36,  1.74it/s, Mean Loss=0.139] 

  6%|▌         | 335/5500 [03:23<51:03,  1.69it/s, Mean Loss=0.139]

  6%|▌         | 335/5500 [03:23<51:03,  1.69it/s, Mean Loss=0.213]

  6%|▌         | 336/5500 [03:24<50:27,  1.71it/s, Mean Loss=0.213]

  6%|▌         | 336/5500 [03:24<50:27,  1.71it/s, Mean Loss=0.21] 

  6%|▌         | 337/5500 [03:24<49:03,  1.75it/s, Mean Loss=0.21]

  6%|▌         | 337/5500 [03:24<49:03,  1.75it/s, Mean Loss=0.159]

  6%|▌         | 338/5500 [03:25<50:11,  1.71it/s, Mean Loss=0.159]

  6%|▌         | 338/5500 [03:25<50:11,  1.71it/s, Mean Loss=0.127]

  6%|▌         | 339/5500 [03:25<48:52,  1.76it/s, Mean Loss=0.127]

  6%|▌         | 339/5500 [03:25<48:52,  1.76it/s, Mean Loss=0.0985]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 173.42it/s]




  6%|▌         | 340/5500 [03:26<51:26,  1.67it/s, Mean Loss=0.0985]

  6%|▌         | 340/5500 [03:26<51:26,  1.67it/s, Mean Loss=0.148] 

  6%|▌         | 341/5500 [03:26<50:33,  1.70it/s, Mean Loss=0.148]

  6%|▌         | 341/5500 [03:26<50:33,  1.70it/s, Mean Loss=0.19] 

  6%|▌         | 342/5500 [03:27<49:36,  1.73it/s, Mean Loss=0.19]

  6%|▌         | 342/5500 [03:27<49:36,  1.73it/s, Mean Loss=0.145]

  6%|▌         | 343/5500 [03:28<49:14,  1.75it/s, Mean Loss=0.145]

  6%|▌         | 343/5500 [03:28<49:14,  1.75it/s, Mean Loss=0.183]

  6%|▋         | 344/5500 [03:28<48:10,  1.78it/s, Mean Loss=0.183]

  6%|▋         | 344/5500 [03:28<48:10,  1.78it/s, Mean Loss=0.18] 

  6%|▋         | 345/5500 [03:29<47:28,  1.81it/s, Mean Loss=0.18]

  6%|▋         | 345/5500 [03:29<47:28,  1.81it/s, Mean Loss=0.194]

  6%|▋         | 346/5500 [03:29<46:31,  1.85it/s, Mean Loss=0.194]

  6%|▋         | 346/5500 [03:29<46:31,  1.85it/s, Mean Loss=0.212]

  6%|▋         | 347/5500 [03:30<46:42,  1.84it/s, Mean Loss=0.212]

  6%|▋         | 347/5500 [03:30<46:42,  1.84it/s, Mean Loss=0.162]

  6%|▋         | 348/5500 [03:30<46:28,  1.85it/s, Mean Loss=0.162]

  6%|▋         | 348/5500 [03:30<46:28,  1.85it/s, Mean Loss=0.228]

  6%|▋         | 349/5500 [03:31<45:49,  1.87it/s, Mean Loss=0.228]

  6%|▋         | 349/5500 [03:31<45:49,  1.87it/s, Mean Loss=0.0753]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 175.43it/s]




  6%|▋         | 350/5500 [03:31<48:51,  1.76it/s, Mean Loss=0.0753]

  6%|▋         | 350/5500 [03:31<48:51,  1.76it/s, Mean Loss=0.12]  

  6%|▋         | 351/5500 [03:32<48:46,  1.76it/s, Mean Loss=0.12]

  6%|▋         | 351/5500 [03:32<48:46,  1.76it/s, Mean Loss=0.209]

  6%|▋         | 352/5500 [03:32<48:20,  1.77it/s, Mean Loss=0.209]

  6%|▋         | 352/5500 [03:33<48:20,  1.77it/s, Mean Loss=0.162]

  6%|▋         | 353/5500 [03:33<48:47,  1.76it/s, Mean Loss=0.162]

  6%|▋         | 353/5500 [03:33<48:47,  1.76it/s, Mean Loss=0.154]

  6%|▋         | 354/5500 [03:34<48:17,  1.78it/s, Mean Loss=0.154]

  6%|▋         | 354/5500 [03:34<48:17,  1.78it/s, Mean Loss=0.173]

  6%|▋         | 355/5500 [03:34<47:28,  1.81it/s, Mean Loss=0.173]

  6%|▋         | 355/5500 [03:34<47:28,  1.81it/s, Mean Loss=0.128]

  6%|▋         | 356/5500 [03:35<52:28,  1.63it/s, Mean Loss=0.128]

  6%|▋         | 356/5500 [03:35<52:28,  1.63it/s, Mean Loss=0.106]

  6%|▋         | 357/5500 [03:35<50:11,  1.71it/s, Mean Loss=0.106]

  6%|▋         | 357/5500 [03:35<50:11,  1.71it/s, Mean Loss=0.146]

  7%|▋         | 358/5500 [03:36<49:12,  1.74it/s, Mean Loss=0.146]

  7%|▋         | 358/5500 [03:36<49:12,  1.74it/s, Mean Loss=0.0977]

  7%|▋         | 359/5500 [03:37<48:30,  1.77it/s, Mean Loss=0.0977]

  7%|▋         | 359/5500 [03:37<48:30,  1.77it/s, Mean Loss=0.191] 




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 188.08it/s]




  7%|▋         | 360/5500 [03:37<49:42,  1.72it/s, Mean Loss=0.191]

  7%|▋         | 360/5500 [03:37<49:42,  1.72it/s, Mean Loss=0.178]

  7%|▋         | 361/5500 [03:38<48:05,  1.78it/s, Mean Loss=0.178]

  7%|▋         | 361/5500 [03:38<48:05,  1.78it/s, Mean Loss=0.132]

  7%|▋         | 362/5500 [03:38<46:43,  1.83it/s, Mean Loss=0.132]

  7%|▋         | 362/5500 [03:38<46:43,  1.83it/s, Mean Loss=0.235]

  7%|▋         | 363/5500 [03:39<44:30,  1.92it/s, Mean Loss=0.235]

  7%|▋         | 363/5500 [03:39<44:30,  1.92it/s, Mean Loss=0.133]

  7%|▋         | 364/5500 [03:39<43:59,  1.95it/s, Mean Loss=0.133]

  7%|▋         | 364/5500 [03:39<43:59,  1.95it/s, Mean Loss=0.241]

  7%|▋         | 365/5500 [03:40<43:11,  1.98it/s, Mean Loss=0.241]

  7%|▋         | 365/5500 [03:40<43:11,  1.98it/s, Mean Loss=0.128]

  7%|▋         | 366/5500 [03:40<42:40,  2.01it/s, Mean Loss=0.128]

  7%|▋         | 366/5500 [03:40<42:40,  2.01it/s, Mean Loss=0.11] 

  7%|▋         | 367/5500 [03:41<42:11,  2.03it/s, Mean Loss=0.11]

  7%|▋         | 367/5500 [03:41<42:11,  2.03it/s, Mean Loss=0.157]

  7%|▋         | 368/5500 [03:41<42:24,  2.02it/s, Mean Loss=0.157]

  7%|▋         | 368/5500 [03:41<42:24,  2.02it/s, Mean Loss=0.218]

  7%|▋         | 369/5500 [03:42<41:34,  2.06it/s, Mean Loss=0.218]

  7%|▋         | 369/5500 [03:42<41:34,  2.06it/s, Mean Loss=0.28] 




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 239.94it/s]




  7%|▋         | 370/5500 [03:42<43:38,  1.96it/s, Mean Loss=0.28]

  7%|▋         | 370/5500 [03:42<43:38,  1.96it/s, Mean Loss=0.106]

  7%|▋         | 371/5500 [03:43<42:51,  1.99it/s, Mean Loss=0.106]

  7%|▋         | 371/5500 [03:43<42:51,  1.99it/s, Mean Loss=0.134]

  7%|▋         | 372/5500 [03:43<44:06,  1.94it/s, Mean Loss=0.134]

  7%|▋         | 372/5500 [03:43<44:06,  1.94it/s, Mean Loss=0.176]

  7%|▋         | 373/5500 [03:44<43:46,  1.95it/s, Mean Loss=0.176]

  7%|▋         | 373/5500 [03:44<43:46,  1.95it/s, Mean Loss=0.241]

  7%|▋         | 374/5500 [03:44<43:55,  1.94it/s, Mean Loss=0.241]

  7%|▋         | 374/5500 [03:44<43:55,  1.94it/s, Mean Loss=0.249]

  7%|▋         | 375/5500 [03:45<44:14,  1.93it/s, Mean Loss=0.249]

  7%|▋         | 375/5500 [03:45<44:14,  1.93it/s, Mean Loss=0.105]

  7%|▋         | 376/5500 [03:45<43:52,  1.95it/s, Mean Loss=0.105]

  7%|▋         | 376/5500 [03:45<43:52,  1.95it/s, Mean Loss=0.133]

  7%|▋         | 377/5500 [03:46<44:28,  1.92it/s, Mean Loss=0.133]

  7%|▋         | 377/5500 [03:46<44:28,  1.92it/s, Mean Loss=0.117]

  7%|▋         | 378/5500 [03:46<43:48,  1.95it/s, Mean Loss=0.117]

  7%|▋         | 378/5500 [03:46<43:48,  1.95it/s, Mean Loss=0.242]

  7%|▋         | 379/5500 [03:47<44:44,  1.91it/s, Mean Loss=0.242]

  7%|▋         | 379/5500 [03:47<44:44,  1.91it/s, Mean Loss=0.246]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 222.05it/s]




  7%|▋         | 380/5500 [03:47<46:59,  1.82it/s, Mean Loss=0.246]

  7%|▋         | 380/5500 [03:47<46:59,  1.82it/s, Mean Loss=0.151]

  7%|▋         | 381/5500 [03:48<45:45,  1.86it/s, Mean Loss=0.151]

  7%|▋         | 381/5500 [03:48<45:45,  1.86it/s, Mean Loss=0.149]

  7%|▋         | 382/5500 [03:48<45:16,  1.88it/s, Mean Loss=0.149]

  7%|▋         | 382/5500 [03:48<45:16,  1.88it/s, Mean Loss=0.0778]

  7%|▋         | 383/5500 [03:49<45:18,  1.88it/s, Mean Loss=0.0778]

  7%|▋         | 383/5500 [03:49<45:18,  1.88it/s, Mean Loss=0.147] 

  7%|▋         | 384/5500 [03:49<44:24,  1.92it/s, Mean Loss=0.147]

  7%|▋         | 384/5500 [03:49<44:24,  1.92it/s, Mean Loss=0.184]

  7%|▋         | 385/5500 [03:50<43:54,  1.94it/s, Mean Loss=0.184]

  7%|▋         | 385/5500 [03:50<43:54,  1.94it/s, Mean Loss=0.167]

  7%|▋         | 386/5500 [03:50<44:00,  1.94it/s, Mean Loss=0.167]

  7%|▋         | 386/5500 [03:50<44:00,  1.94it/s, Mean Loss=0.206]

  7%|▋         | 387/5500 [03:51<43:09,  1.97it/s, Mean Loss=0.206]

  7%|▋         | 387/5500 [03:51<43:09,  1.97it/s, Mean Loss=0.179]

  7%|▋         | 388/5500 [03:51<43:23,  1.96it/s, Mean Loss=0.179]

  7%|▋         | 388/5500 [03:51<43:23,  1.96it/s, Mean Loss=0.109]

  7%|▋         | 389/5500 [03:52<43:31,  1.96it/s, Mean Loss=0.109]

  7%|▋         | 389/5500 [03:52<43:31,  1.96it/s, Mean Loss=0.24] 




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 175.81it/s]




  7%|▋         | 390/5500 [03:53<46:06,  1.85it/s, Mean Loss=0.24]

  7%|▋         | 390/5500 [03:53<46:06,  1.85it/s, Mean Loss=0.181]

  7%|▋         | 391/5500 [03:53<45:30,  1.87it/s, Mean Loss=0.181]

  7%|▋         | 391/5500 [03:53<45:30,  1.87it/s, Mean Loss=0.178]

  7%|▋         | 392/5500 [03:54<46:29,  1.83it/s, Mean Loss=0.178]

  7%|▋         | 392/5500 [03:54<46:29,  1.83it/s, Mean Loss=0.137]

  7%|▋         | 393/5500 [03:54<46:03,  1.85it/s, Mean Loss=0.137]

  7%|▋         | 393/5500 [03:54<46:03,  1.85it/s, Mean Loss=0.129]

  7%|▋         | 394/5500 [03:55<45:25,  1.87it/s, Mean Loss=0.129]

  7%|▋         | 394/5500 [03:55<45:25,  1.87it/s, Mean Loss=0.142]

  7%|▋         | 395/5500 [03:55<44:59,  1.89it/s, Mean Loss=0.142]

  7%|▋         | 395/5500 [03:55<44:59,  1.89it/s, Mean Loss=0.154]

  7%|▋         | 396/5500 [03:56<47:12,  1.80it/s, Mean Loss=0.154]

  7%|▋         | 396/5500 [03:56<47:12,  1.80it/s, Mean Loss=0.149]

  7%|▋         | 397/5500 [03:56<46:38,  1.82it/s, Mean Loss=0.149]

  7%|▋         | 397/5500 [03:56<46:38,  1.82it/s, Mean Loss=0.183]

  7%|▋         | 398/5500 [03:57<45:52,  1.85it/s, Mean Loss=0.183]

  7%|▋         | 398/5500 [03:57<45:52,  1.85it/s, Mean Loss=0.176]

  7%|▋         | 399/5500 [03:59<1:18:03,  1.09it/s, Mean Loss=0.176]

  7%|▋         | 399/5500 [03:59<1:18:03,  1.09it/s, Mean Loss=0.127]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 196.56it/s]




  7%|▋         | 400/5500 [03:59<1:09:52,  1.22it/s, Mean Loss=0.127]

  7%|▋         | 400/5500 [03:59<1:09:52,  1.22it/s, Mean Loss=0.0694]

  7%|▋         | 401/5500 [04:00<1:04:36,  1.32it/s, Mean Loss=0.0694]

  7%|▋         | 401/5500 [04:00<1:04:36,  1.32it/s, Mean Loss=0.183] 

  7%|▋         | 402/5500 [04:00<57:47,  1.47it/s, Mean Loss=0.183]  

  7%|▋         | 402/5500 [04:00<57:47,  1.47it/s, Mean Loss=0.0837]

  7%|▋         | 403/5500 [04:01<54:41,  1.55it/s, Mean Loss=0.0837]

  7%|▋         | 403/5500 [04:01<54:41,  1.55it/s, Mean Loss=0.196] 

  7%|▋         | 404/5500 [04:01<51:39,  1.64it/s, Mean Loss=0.196]

  7%|▋         | 404/5500 [04:01<51:39,  1.64it/s, Mean Loss=0.15] 

  7%|▋         | 405/5500 [04:02<49:26,  1.72it/s, Mean Loss=0.15]

  7%|▋         | 405/5500 [04:02<49:26,  1.72it/s, Mean Loss=0.145]

  7%|▋         | 406/5500 [04:03<48:01,  1.77it/s, Mean Loss=0.145]

  7%|▋         | 406/5500 [04:03<48:01,  1.77it/s, Mean Loss=0.241]

  7%|▋         | 407/5500 [04:03<46:49,  1.81it/s, Mean Loss=0.241]

  7%|▋         | 407/5500 [04:03<46:49,  1.81it/s, Mean Loss=0.19] 

  7%|▋         | 408/5500 [04:04<45:55,  1.85it/s, Mean Loss=0.19]

  7%|▋         | 408/5500 [04:04<45:55,  1.85it/s, Mean Loss=0.121]

  7%|▋         | 409/5500 [04:04<45:06,  1.88it/s, Mean Loss=0.121]

  7%|▋         | 409/5500 [04:04<45:06,  1.88it/s, Mean Loss=0.159]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 192.22it/s]




  7%|▋         | 410/5500 [04:05<46:10,  1.84it/s, Mean Loss=0.159]

  7%|▋         | 410/5500 [04:05<46:10,  1.84it/s, Mean Loss=0.218]

  7%|▋         | 411/5500 [04:05<44:57,  1.89it/s, Mean Loss=0.218]

  7%|▋         | 411/5500 [04:05<44:57,  1.89it/s, Mean Loss=0.23] 

  7%|▋         | 412/5500 [04:06<44:39,  1.90it/s, Mean Loss=0.23]

  7%|▋         | 412/5500 [04:06<44:39,  1.90it/s, Mean Loss=0.171]

  8%|▊         | 413/5500 [04:06<44:02,  1.93it/s, Mean Loss=0.171]

  8%|▊         | 413/5500 [04:06<44:02,  1.93it/s, Mean Loss=0.152]

  8%|▊         | 414/5500 [04:07<43:01,  1.97it/s, Mean Loss=0.152]

  8%|▊         | 414/5500 [04:07<43:01,  1.97it/s, Mean Loss=0.186]

  8%|▊         | 415/5500 [04:07<43:38,  1.94it/s, Mean Loss=0.186]

  8%|▊         | 415/5500 [04:07<43:38,  1.94it/s, Mean Loss=0.152]

  8%|▊         | 416/5500 [04:08<45:31,  1.86it/s, Mean Loss=0.152]

  8%|▊         | 416/5500 [04:08<45:31,  1.86it/s, Mean Loss=0.164]

  8%|▊         | 417/5500 [04:08<43:55,  1.93it/s, Mean Loss=0.164]

  8%|▊         | 417/5500 [04:08<43:55,  1.93it/s, Mean Loss=0.182]

  8%|▊         | 418/5500 [04:09<43:39,  1.94it/s, Mean Loss=0.182]

  8%|▊         | 418/5500 [04:09<43:39,  1.94it/s, Mean Loss=0.13] 

  8%|▊         | 419/5500 [04:09<43:42,  1.94it/s, Mean Loss=0.13]

  8%|▊         | 419/5500 [04:09<43:42,  1.94it/s, Mean Loss=0.131]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 197.93it/s]




  8%|▊         | 420/5500 [04:10<45:18,  1.87it/s, Mean Loss=0.131]

  8%|▊         | 420/5500 [04:10<45:18,  1.87it/s, Mean Loss=0.2]  

  8%|▊         | 421/5500 [04:10<44:49,  1.89it/s, Mean Loss=0.2]

  8%|▊         | 421/5500 [04:10<44:49,  1.89it/s, Mean Loss=0.117]

  8%|▊         | 422/5500 [04:11<44:07,  1.92it/s, Mean Loss=0.117]

  8%|▊         | 422/5500 [04:11<44:07,  1.92it/s, Mean Loss=0.17] 

  8%|▊         | 423/5500 [04:11<44:23,  1.91it/s, Mean Loss=0.17]

  8%|▊         | 423/5500 [04:11<44:23,  1.91it/s, Mean Loss=0.148]

  8%|▊         | 424/5500 [04:12<43:34,  1.94it/s, Mean Loss=0.148]

  8%|▊         | 424/5500 [04:12<43:34,  1.94it/s, Mean Loss=0.172]

  8%|▊         | 425/5500 [04:12<42:51,  1.97it/s, Mean Loss=0.172]

  8%|▊         | 425/5500 [04:12<42:51,  1.97it/s, Mean Loss=0.156]

  8%|▊         | 426/5500 [04:13<42:35,  1.99it/s, Mean Loss=0.156]

  8%|▊         | 426/5500 [04:13<42:35,  1.99it/s, Mean Loss=0.154]

  8%|▊         | 427/5500 [04:13<42:28,  1.99it/s, Mean Loss=0.154]

  8%|▊         | 427/5500 [04:13<42:28,  1.99it/s, Mean Loss=0.217]

  8%|▊         | 428/5500 [04:14<42:28,  1.99it/s, Mean Loss=0.217]

  8%|▊         | 428/5500 [04:14<42:28,  1.99it/s, Mean Loss=0.119]

  8%|▊         | 429/5500 [04:14<42:18,  2.00it/s, Mean Loss=0.119]

  8%|▊         | 429/5500 [04:14<42:18,  2.00it/s, Mean Loss=0.0853]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 169.57it/s]




  8%|▊         | 430/5500 [04:15<44:04,  1.92it/s, Mean Loss=0.0853]

  8%|▊         | 430/5500 [04:15<44:04,  1.92it/s, Mean Loss=0.192] 

  8%|▊         | 431/5500 [04:15<43:03,  1.96it/s, Mean Loss=0.192]

  8%|▊         | 431/5500 [04:15<43:03,  1.96it/s, Mean Loss=0.134]

  8%|▊         | 432/5500 [04:16<42:52,  1.97it/s, Mean Loss=0.134]

  8%|▊         | 432/5500 [04:16<42:52,  1.97it/s, Mean Loss=0.12] 

  8%|▊         | 433/5500 [04:16<42:36,  1.98it/s, Mean Loss=0.12]

  8%|▊         | 433/5500 [04:16<42:36,  1.98it/s, Mean Loss=0.0829]

  8%|▊         | 434/5500 [04:17<42:07,  2.00it/s, Mean Loss=0.0829]

  8%|▊         | 434/5500 [04:17<42:07,  2.00it/s, Mean Loss=0.0702]

  8%|▊         | 435/5500 [04:17<42:25,  1.99it/s, Mean Loss=0.0702]

  8%|▊         | 435/5500 [04:17<42:25,  1.99it/s, Mean Loss=0.159] 

  8%|▊         | 436/5500 [04:18<42:27,  1.99it/s, Mean Loss=0.159]

  8%|▊         | 436/5500 [04:18<42:27,  1.99it/s, Mean Loss=0.24] 

  8%|▊         | 437/5500 [04:19<44:01,  1.92it/s, Mean Loss=0.24]

  8%|▊         | 437/5500 [04:19<44:01,  1.92it/s, Mean Loss=0.144]

  8%|▊         | 438/5500 [04:19<43:56,  1.92it/s, Mean Loss=0.144]

  8%|▊         | 438/5500 [04:19<43:56,  1.92it/s, Mean Loss=0.131]

  8%|▊         | 439/5500 [04:20<53:56,  1.56it/s, Mean Loss=0.131]

  8%|▊         | 439/5500 [04:20<53:56,  1.56it/s, Mean Loss=0.256]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 176.12it/s]




  8%|▊         | 440/5500 [04:21<53:20,  1.58it/s, Mean Loss=0.256]

  8%|▊         | 440/5500 [04:21<53:20,  1.58it/s, Mean Loss=0.149]

  8%|▊         | 441/5500 [04:21<50:16,  1.68it/s, Mean Loss=0.149]

  8%|▊         | 441/5500 [04:21<50:16,  1.68it/s, Mean Loss=0.23] 

  8%|▊         | 442/5500 [04:22<48:17,  1.75it/s, Mean Loss=0.23]

  8%|▊         | 442/5500 [04:22<48:17,  1.75it/s, Mean Loss=0.16]

  8%|▊         | 443/5500 [04:22<46:06,  1.83it/s, Mean Loss=0.16]

  8%|▊         | 443/5500 [04:22<46:06,  1.83it/s, Mean Loss=0.231]

  8%|▊         | 444/5500 [04:23<44:28,  1.89it/s, Mean Loss=0.231]

  8%|▊         | 444/5500 [04:23<44:28,  1.89it/s, Mean Loss=0.0936]

  8%|▊         | 445/5500 [04:23<44:30,  1.89it/s, Mean Loss=0.0936]

  8%|▊         | 445/5500 [04:23<44:30,  1.89it/s, Mean Loss=0.0977]

  8%|▊         | 446/5500 [04:24<43:53,  1.92it/s, Mean Loss=0.0977]

  8%|▊         | 446/5500 [04:24<43:53,  1.92it/s, Mean Loss=0.111] 

  8%|▊         | 447/5500 [04:24<43:14,  1.95it/s, Mean Loss=0.111]

  8%|▊         | 447/5500 [04:24<43:14,  1.95it/s, Mean Loss=0.0998]

  8%|▊         | 448/5500 [04:25<43:21,  1.94it/s, Mean Loss=0.0998]

  8%|▊         | 448/5500 [04:25<43:21,  1.94it/s, Mean Loss=0.127] 

  8%|▊         | 449/5500 [04:25<42:56,  1.96it/s, Mean Loss=0.127]

  8%|▊         | 449/5500 [04:25<42:56,  1.96it/s, Mean Loss=0.102]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 208.63it/s]




  8%|▊         | 450/5500 [04:26<45:09,  1.86it/s, Mean Loss=0.102]

  8%|▊         | 450/5500 [04:26<45:09,  1.86it/s, Mean Loss=0.128]

  8%|▊         | 451/5500 [04:26<44:48,  1.88it/s, Mean Loss=0.128]

  8%|▊         | 451/5500 [04:26<44:48,  1.88it/s, Mean Loss=0.196]

  8%|▊         | 452/5500 [04:27<45:41,  1.84it/s, Mean Loss=0.196]

  8%|▊         | 452/5500 [04:27<45:41,  1.84it/s, Mean Loss=0.141]

  8%|▊         | 453/5500 [04:27<45:45,  1.84it/s, Mean Loss=0.141]

  8%|▊         | 453/5500 [04:27<45:45,  1.84it/s, Mean Loss=0.19] 

  8%|▊         | 454/5500 [04:28<46:03,  1.83it/s, Mean Loss=0.19]

  8%|▊         | 454/5500 [04:28<46:03,  1.83it/s, Mean Loss=0.159]

  8%|▊         | 455/5500 [04:28<45:25,  1.85it/s, Mean Loss=0.159]

  8%|▊         | 455/5500 [04:28<45:25,  1.85it/s, Mean Loss=0.169]

  8%|▊         | 456/5500 [04:29<45:34,  1.84it/s, Mean Loss=0.169]

  8%|▊         | 456/5500 [04:29<45:34,  1.84it/s, Mean Loss=0.175]

  8%|▊         | 457/5500 [04:29<45:09,  1.86it/s, Mean Loss=0.175]

  8%|▊         | 457/5500 [04:30<45:09,  1.86it/s, Mean Loss=0.225]

  8%|▊         | 458/5500 [04:30<45:17,  1.86it/s, Mean Loss=0.225]

  8%|▊         | 458/5500 [04:30<45:17,  1.86it/s, Mean Loss=0.112]

  8%|▊         | 459/5500 [04:31<46:05,  1.82it/s, Mean Loss=0.112]

  8%|▊         | 459/5500 [04:31<46:05,  1.82it/s, Mean Loss=0.17] 




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 196.95it/s]




  8%|▊         | 460/5500 [04:31<47:51,  1.76it/s, Mean Loss=0.17]

  8%|▊         | 460/5500 [04:31<47:51,  1.76it/s, Mean Loss=0.136]

  8%|▊         | 461/5500 [04:32<46:55,  1.79it/s, Mean Loss=0.136]

  8%|▊         | 461/5500 [04:32<46:55,  1.79it/s, Mean Loss=0.148]

  8%|▊         | 462/5500 [04:32<46:29,  1.81it/s, Mean Loss=0.148]

  8%|▊         | 462/5500 [04:32<46:29,  1.81it/s, Mean Loss=0.205]

  8%|▊         | 463/5500 [04:33<45:13,  1.86it/s, Mean Loss=0.205]

  8%|▊         | 463/5500 [04:33<45:13,  1.86it/s, Mean Loss=0.193]

  8%|▊         | 464/5500 [04:33<44:09,  1.90it/s, Mean Loss=0.193]

  8%|▊         | 464/5500 [04:33<44:09,  1.90it/s, Mean Loss=0.235]

  8%|▊         | 465/5500 [04:34<43:57,  1.91it/s, Mean Loss=0.235]

  8%|▊         | 465/5500 [04:34<43:57,  1.91it/s, Mean Loss=0.173]

  8%|▊         | 466/5500 [04:34<42:59,  1.95it/s, Mean Loss=0.173]

  8%|▊         | 466/5500 [04:34<42:59,  1.95it/s, Mean Loss=0.184]

  8%|▊         | 467/5500 [04:35<43:03,  1.95it/s, Mean Loss=0.184]

  8%|▊         | 467/5500 [04:35<43:03,  1.95it/s, Mean Loss=0.133]

  9%|▊         | 468/5500 [04:35<43:02,  1.95it/s, Mean Loss=0.133]

  9%|▊         | 468/5500 [04:35<43:02,  1.95it/s, Mean Loss=0.16] 

  9%|▊         | 469/5500 [04:36<42:02,  1.99it/s, Mean Loss=0.16]

  9%|▊         | 469/5500 [04:36<42:02,  1.99it/s, Mean Loss=0.188]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 172.87it/s]




  9%|▊         | 470/5500 [04:36<44:40,  1.88it/s, Mean Loss=0.188]

  9%|▊         | 470/5500 [04:36<44:40,  1.88it/s, Mean Loss=0.143]

  9%|▊         | 471/5500 [04:37<43:49,  1.91it/s, Mean Loss=0.143]

  9%|▊         | 471/5500 [04:37<43:49,  1.91it/s, Mean Loss=0.138]

  9%|▊         | 472/5500 [04:37<43:59,  1.90it/s, Mean Loss=0.138]

  9%|▊         | 472/5500 [04:37<43:59,  1.90it/s, Mean Loss=0.156]

  9%|▊         | 473/5500 [04:38<43:40,  1.92it/s, Mean Loss=0.156]

  9%|▊         | 473/5500 [04:38<43:40,  1.92it/s, Mean Loss=0.17] 

  9%|▊         | 474/5500 [04:38<43:53,  1.91it/s, Mean Loss=0.17]

  9%|▊         | 474/5500 [04:39<43:53,  1.91it/s, Mean Loss=0.144]

  9%|▊         | 475/5500 [04:39<46:23,  1.80it/s, Mean Loss=0.144]

  9%|▊         | 475/5500 [04:39<46:23,  1.80it/s, Mean Loss=0.0733]

  9%|▊         | 476/5500 [04:40<47:26,  1.77it/s, Mean Loss=0.0733]

  9%|▊         | 476/5500 [04:40<47:26,  1.77it/s, Mean Loss=0.225] 

  9%|▊         | 477/5500 [04:40<46:35,  1.80it/s, Mean Loss=0.225]

  9%|▊         | 477/5500 [04:40<46:35,  1.80it/s, Mean Loss=0.129]

  9%|▊         | 478/5500 [04:41<46:20,  1.81it/s, Mean Loss=0.129]

  9%|▊         | 478/5500 [04:41<46:20,  1.81it/s, Mean Loss=0.0931]

  9%|▊         | 479/5500 [04:41<46:11,  1.81it/s, Mean Loss=0.0931]

  9%|▊         | 479/5500 [04:41<46:11,  1.81it/s, Mean Loss=0.176] 




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 175.78it/s]




  9%|▊         | 480/5500 [04:42<49:12,  1.70it/s, Mean Loss=0.176]

  9%|▊         | 480/5500 [04:42<49:12,  1.70it/s, Mean Loss=0.178]

  9%|▊         | 481/5500 [04:43<48:58,  1.71it/s, Mean Loss=0.178]

  9%|▊         | 481/5500 [04:43<48:58,  1.71it/s, Mean Loss=0.225]

  9%|▉         | 482/5500 [04:43<48:24,  1.73it/s, Mean Loss=0.225]

  9%|▉         | 482/5500 [04:43<48:24,  1.73it/s, Mean Loss=0.104]

  9%|▉         | 483/5500 [04:44<47:15,  1.77it/s, Mean Loss=0.104]

  9%|▉         | 483/5500 [04:44<47:15,  1.77it/s, Mean Loss=0.188]

  9%|▉         | 484/5500 [04:44<46:07,  1.81it/s, Mean Loss=0.188]

  9%|▉         | 484/5500 [04:44<46:07,  1.81it/s, Mean Loss=0.21] 

  9%|▉         | 485/5500 [04:45<45:16,  1.85it/s, Mean Loss=0.21]

  9%|▉         | 485/5500 [04:45<45:16,  1.85it/s, Mean Loss=0.0574]

  9%|▉         | 486/5500 [04:45<50:04,  1.67it/s, Mean Loss=0.0574]

  9%|▉         | 486/5500 [04:45<50:04,  1.67it/s, Mean Loss=0.127] 

  9%|▉         | 487/5500 [04:46<49:34,  1.69it/s, Mean Loss=0.127]

  9%|▉         | 487/5500 [04:46<49:34,  1.69it/s, Mean Loss=0.193]

  9%|▉         | 488/5500 [04:47<46:52,  1.78it/s, Mean Loss=0.193]

  9%|▉         | 488/5500 [04:47<46:52,  1.78it/s, Mean Loss=0.214]

  9%|▉         | 489/5500 [04:47<46:05,  1.81it/s, Mean Loss=0.214]

  9%|▉         | 489/5500 [04:47<46:05,  1.81it/s, Mean Loss=0.0969]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 221.52it/s]




  9%|▉         | 490/5500 [04:48<47:40,  1.75it/s, Mean Loss=0.0969]

  9%|▉         | 490/5500 [04:48<47:40,  1.75it/s, Mean Loss=0.175] 

  9%|▉         | 491/5500 [04:48<45:46,  1.82it/s, Mean Loss=0.175]

  9%|▉         | 491/5500 [04:48<45:46,  1.82it/s, Mean Loss=0.151]

  9%|▉         | 492/5500 [04:49<44:37,  1.87it/s, Mean Loss=0.151]

  9%|▉         | 492/5500 [04:49<44:37,  1.87it/s, Mean Loss=0.11] 

  9%|▉         | 493/5500 [04:49<44:14,  1.89it/s, Mean Loss=0.11]

  9%|▉         | 493/5500 [04:49<44:14,  1.89it/s, Mean Loss=0.132]

  9%|▉         | 494/5500 [04:50<44:54,  1.86it/s, Mean Loss=0.132]

  9%|▉         | 494/5500 [04:50<44:54,  1.86it/s, Mean Loss=0.117]

  9%|▉         | 495/5500 [04:50<44:28,  1.88it/s, Mean Loss=0.117]

  9%|▉         | 495/5500 [04:50<44:28,  1.88it/s, Mean Loss=0.127]

  9%|▉         | 496/5500 [04:51<44:29,  1.87it/s, Mean Loss=0.127]

  9%|▉         | 496/5500 [04:51<44:29,  1.87it/s, Mean Loss=0.147]

  9%|▉         | 497/5500 [04:51<44:07,  1.89it/s, Mean Loss=0.147]

  9%|▉         | 497/5500 [04:51<44:07,  1.89it/s, Mean Loss=0.153]

  9%|▉         | 498/5500 [04:52<46:18,  1.80it/s, Mean Loss=0.153]

  9%|▉         | 498/5500 [04:52<46:18,  1.80it/s, Mean Loss=0.123]

  9%|▉         | 499/5500 [04:52<45:14,  1.84it/s, Mean Loss=0.123]

  9%|▉         | 499/5500 [04:52<45:14,  1.84it/s, Mean Loss=0.17] 




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 220.60it/s]




  9%|▉         | 500/5500 [04:53<47:53,  1.74it/s, Mean Loss=0.17]

  9%|▉         | 500/5500 [04:53<47:53,  1.74it/s, Mean Loss=0.106]

  9%|▉         | 501/5500 [04:54<47:59,  1.74it/s, Mean Loss=0.106]

  9%|▉         | 501/5500 [04:54<47:59,  1.74it/s, Mean Loss=0.15] 

  9%|▉         | 502/5500 [04:54<47:13,  1.76it/s, Mean Loss=0.15]

  9%|▉         | 502/5500 [04:54<47:13,  1.76it/s, Mean Loss=0.101]

  9%|▉         | 503/5500 [04:55<46:08,  1.80it/s, Mean Loss=0.101]

  9%|▉         | 503/5500 [04:55<46:08,  1.80it/s, Mean Loss=0.16] 

  9%|▉         | 504/5500 [04:55<46:12,  1.80it/s, Mean Loss=0.16]

  9%|▉         | 504/5500 [04:55<46:12,  1.80it/s, Mean Loss=0.105]

  9%|▉         | 505/5500 [04:56<46:47,  1.78it/s, Mean Loss=0.105]

  9%|▉         | 505/5500 [04:56<46:47,  1.78it/s, Mean Loss=0.208]

  9%|▉         | 506/5500 [04:58<1:25:29,  1.03s/it, Mean Loss=0.208]

  9%|▉         | 506/5500 [04:58<1:25:29,  1.03s/it, Mean Loss=0.12] 

  9%|▉         | 507/5500 [04:58<1:12:01,  1.16it/s, Mean Loss=0.12]

  9%|▉         | 507/5500 [04:58<1:12:01,  1.16it/s, Mean Loss=0.0598]

  9%|▉         | 508/5500 [04:59<1:03:42,  1.31it/s, Mean Loss=0.0598]

  9%|▉         | 508/5500 [04:59<1:03:42,  1.31it/s, Mean Loss=0.211] 

  9%|▉         | 509/5500 [05:00<58:41,  1.42it/s, Mean Loss=0.211]  

  9%|▉         | 509/5500 [05:00<58:41,  1.42it/s, Mean Loss=0.133]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 169.62it/s]




  9%|▉         | 510/5500 [05:00<57:48,  1.44it/s, Mean Loss=0.133]

  9%|▉         | 510/5500 [05:00<57:48,  1.44it/s, Mean Loss=0.212]

  9%|▉         | 511/5500 [05:01<55:27,  1.50it/s, Mean Loss=0.212]

  9%|▉         | 511/5500 [05:01<55:27,  1.50it/s, Mean Loss=0.101]

  9%|▉         | 512/5500 [05:02<55:04,  1.51it/s, Mean Loss=0.101]

  9%|▉         | 512/5500 [05:02<55:04,  1.51it/s, Mean Loss=0.181]

  9%|▉         | 513/5500 [05:02<54:04,  1.54it/s, Mean Loss=0.181]

  9%|▉         | 513/5500 [05:02<54:04,  1.54it/s, Mean Loss=0.196]

  9%|▉         | 514/5500 [05:03<53:15,  1.56it/s, Mean Loss=0.196]

  9%|▉         | 514/5500 [05:03<53:15,  1.56it/s, Mean Loss=0.123]

  9%|▉         | 515/5500 [05:03<52:19,  1.59it/s, Mean Loss=0.123]

  9%|▉         | 515/5500 [05:03<52:19,  1.59it/s, Mean Loss=0.108]

  9%|▉         | 516/5500 [05:04<49:52,  1.67it/s, Mean Loss=0.108]

  9%|▉         | 516/5500 [05:04<49:52,  1.67it/s, Mean Loss=0.154]

  9%|▉         | 517/5500 [05:04<48:33,  1.71it/s, Mean Loss=0.154]

  9%|▉         | 517/5500 [05:04<48:33,  1.71it/s, Mean Loss=0.191]

  9%|▉         | 518/5500 [05:05<47:45,  1.74it/s, Mean Loss=0.191]

  9%|▉         | 518/5500 [05:05<47:45,  1.74it/s, Mean Loss=0.18] 

  9%|▉         | 519/5500 [05:06<46:52,  1.77it/s, Mean Loss=0.18]

  9%|▉         | 519/5500 [05:06<46:52,  1.77it/s, Mean Loss=0.137]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 152.68it/s]




  9%|▉         | 520/5500 [05:06<48:30,  1.71it/s, Mean Loss=0.137]

  9%|▉         | 520/5500 [05:06<48:30,  1.71it/s, Mean Loss=0.139]

  9%|▉         | 521/5500 [05:07<47:53,  1.73it/s, Mean Loss=0.139]

  9%|▉         | 521/5500 [05:07<47:53,  1.73it/s, Mean Loss=0.194]

  9%|▉         | 522/5500 [05:07<46:40,  1.78it/s, Mean Loss=0.194]

  9%|▉         | 522/5500 [05:07<46:40,  1.78it/s, Mean Loss=0.136]

 10%|▉         | 523/5500 [05:08<46:02,  1.80it/s, Mean Loss=0.136]

 10%|▉         | 523/5500 [05:08<46:02,  1.80it/s, Mean Loss=0.119]

 10%|▉         | 524/5500 [05:08<46:03,  1.80it/s, Mean Loss=0.119]

 10%|▉         | 524/5500 [05:08<46:03,  1.80it/s, Mean Loss=0.137]

 10%|▉         | 525/5500 [05:09<45:37,  1.82it/s, Mean Loss=0.137]

 10%|▉         | 525/5500 [05:09<45:37,  1.82it/s, Mean Loss=0.152]

 10%|▉         | 526/5500 [05:09<46:50,  1.77it/s, Mean Loss=0.152]

 10%|▉         | 526/5500 [05:09<46:50,  1.77it/s, Mean Loss=0.147]

 10%|▉         | 527/5500 [05:10<46:04,  1.80it/s, Mean Loss=0.147]

 10%|▉         | 527/5500 [05:10<46:04,  1.80it/s, Mean Loss=0.187]

 10%|▉         | 528/5500 [05:11<45:43,  1.81it/s, Mean Loss=0.187]

 10%|▉         | 528/5500 [05:11<45:43,  1.81it/s, Mean Loss=0.189]

 10%|▉         | 529/5500 [05:11<45:32,  1.82it/s, Mean Loss=0.189]

 10%|▉         | 529/5500 [05:11<45:32,  1.82it/s, Mean Loss=0.133]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 169.59it/s]




 10%|▉         | 530/5500 [05:12<47:37,  1.74it/s, Mean Loss=0.133]

 10%|▉         | 530/5500 [05:12<47:37,  1.74it/s, Mean Loss=0.0635]

 10%|▉         | 531/5500 [05:12<46:39,  1.78it/s, Mean Loss=0.0635]

 10%|▉         | 531/5500 [05:12<46:39,  1.78it/s, Mean Loss=0.185] 

 10%|▉         | 532/5500 [05:13<46:16,  1.79it/s, Mean Loss=0.185]

 10%|▉         | 532/5500 [05:13<46:16,  1.79it/s, Mean Loss=0.115]

 10%|▉         | 533/5500 [05:13<46:23,  1.78it/s, Mean Loss=0.115]

 10%|▉         | 533/5500 [05:13<46:23,  1.78it/s, Mean Loss=0.217]

 10%|▉         | 534/5500 [05:14<46:33,  1.78it/s, Mean Loss=0.217]

 10%|▉         | 534/5500 [05:14<46:33,  1.78it/s, Mean Loss=0.154]

 10%|▉         | 535/5500 [05:14<45:47,  1.81it/s, Mean Loss=0.154]

 10%|▉         | 535/5500 [05:14<45:47,  1.81it/s, Mean Loss=0.148]

 10%|▉         | 536/5500 [05:15<45:17,  1.83it/s, Mean Loss=0.148]

 10%|▉         | 536/5500 [05:15<45:17,  1.83it/s, Mean Loss=0.157]

 10%|▉         | 537/5500 [05:16<45:12,  1.83it/s, Mean Loss=0.157]

 10%|▉         | 537/5500 [05:16<45:12,  1.83it/s, Mean Loss=0.144]

 10%|▉         | 538/5500 [05:16<48:21,  1.71it/s, Mean Loss=0.144]

 10%|▉         | 538/5500 [05:16<48:21,  1.71it/s, Mean Loss=0.0782]

 10%|▉         | 539/5500 [05:17<50:24,  1.64it/s, Mean Loss=0.0782]

 10%|▉         | 539/5500 [05:17<50:24,  1.64it/s, Mean Loss=0.182] 




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A




Evaluation: 100%|██████████| 15/15 [00:00<00:00, 132.24it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 127.89it/s]




 10%|▉         | 540/5500 [05:18<51:37,  1.60it/s, Mean Loss=0.182]

 10%|▉         | 540/5500 [05:18<51:37,  1.60it/s, Mean Loss=0.178]

 10%|▉         | 541/5500 [05:18<51:46,  1.60it/s, Mean Loss=0.178]

 10%|▉         | 541/5500 [05:18<51:46,  1.60it/s, Mean Loss=0.124]

 10%|▉         | 542/5500 [05:19<49:57,  1.65it/s, Mean Loss=0.124]

 10%|▉         | 542/5500 [05:19<49:57,  1.65it/s, Mean Loss=0.0979]

 10%|▉         | 543/5500 [05:19<48:38,  1.70it/s, Mean Loss=0.0979]

 10%|▉         | 543/5500 [05:19<48:38,  1.70it/s, Mean Loss=0.152] 

 10%|▉         | 544/5500 [05:20<48:07,  1.72it/s, Mean Loss=0.152]

 10%|▉         | 544/5500 [05:20<48:07,  1.72it/s, Mean Loss=0.12] 

 10%|▉         | 545/5500 [05:20<48:08,  1.72it/s, Mean Loss=0.12]

 10%|▉         | 545/5500 [05:20<48:08,  1.72it/s, Mean Loss=0.117]

 10%|▉         | 546/5500 [05:21<48:07,  1.72it/s, Mean Loss=0.117]

 10%|▉         | 546/5500 [05:21<48:07,  1.72it/s, Mean Loss=0.128]

 10%|▉         | 547/5500 [05:22<46:24,  1.78it/s, Mean Loss=0.128]

 10%|▉         | 547/5500 [05:22<46:24,  1.78it/s, Mean Loss=0.238]

 10%|▉         | 548/5500 [05:22<46:33,  1.77it/s, Mean Loss=0.238]

 10%|▉         | 548/5500 [05:22<46:33,  1.77it/s, Mean Loss=0.157]

 10%|▉         | 549/5500 [05:23<46:40,  1.77it/s, Mean Loss=0.157]

 10%|▉         | 549/5500 [05:23<46:40,  1.77it/s, Mean Loss=0.244]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 190.40it/s]




 10%|█         | 550/5500 [05:23<49:54,  1.65it/s, Mean Loss=0.244]

 10%|█         | 550/5500 [05:23<49:54,  1.65it/s, Mean Loss=0.185]

 10%|█         | 551/5500 [05:24<50:34,  1.63it/s, Mean Loss=0.185]

 10%|█         | 551/5500 [05:24<50:34,  1.63it/s, Mean Loss=0.0767]

 10%|█         | 552/5500 [05:25<49:23,  1.67it/s, Mean Loss=0.0767]

 10%|█         | 552/5500 [05:25<49:23,  1.67it/s, Mean Loss=0.126] 

 10%|█         | 553/5500 [05:25<47:21,  1.74it/s, Mean Loss=0.126]

 10%|█         | 553/5500 [05:25<47:21,  1.74it/s, Mean Loss=0.167]

 10%|█         | 554/5500 [05:26<47:52,  1.72it/s, Mean Loss=0.167]

 10%|█         | 554/5500 [05:26<47:52,  1.72it/s, Mean Loss=0.171]

 10%|█         | 555/5500 [05:26<49:12,  1.67it/s, Mean Loss=0.171]

 10%|█         | 555/5500 [05:26<49:12,  1.67it/s, Mean Loss=0.158]

 10%|█         | 556/5500 [05:27<48:32,  1.70it/s, Mean Loss=0.158]

 10%|█         | 556/5500 [05:27<48:32,  1.70it/s, Mean Loss=0.079]

 10%|█         | 557/5500 [05:27<48:21,  1.70it/s, Mean Loss=0.079]

 10%|█         | 557/5500 [05:27<48:21,  1.70it/s, Mean Loss=0.147]

 10%|█         | 558/5500 [05:28<49:04,  1.68it/s, Mean Loss=0.147]

 10%|█         | 558/5500 [05:28<49:04,  1.68it/s, Mean Loss=0.129]

 10%|█         | 559/5500 [05:29<47:53,  1.72it/s, Mean Loss=0.129]

 10%|█         | 559/5500 [05:29<47:53,  1.72it/s, Mean Loss=0.126]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 187.48it/s]




 10%|█         | 560/5500 [05:29<48:57,  1.68it/s, Mean Loss=0.126]

 10%|█         | 560/5500 [05:29<48:57,  1.68it/s, Mean Loss=0.212]

 10%|█         | 561/5500 [05:30<48:53,  1.68it/s, Mean Loss=0.212]

 10%|█         | 561/5500 [05:30<48:53,  1.68it/s, Mean Loss=0.151]

 10%|█         | 562/5500 [05:30<47:45,  1.72it/s, Mean Loss=0.151]

 10%|█         | 562/5500 [05:30<47:45,  1.72it/s, Mean Loss=0.192]

 10%|█         | 563/5500 [05:31<47:00,  1.75it/s, Mean Loss=0.192]

 10%|█         | 563/5500 [05:31<47:00,  1.75it/s, Mean Loss=0.171]

 10%|█         | 564/5500 [05:32<46:31,  1.77it/s, Mean Loss=0.171]

 10%|█         | 564/5500 [05:32<46:31,  1.77it/s, Mean Loss=0.163]

 10%|█         | 565/5500 [05:32<47:00,  1.75it/s, Mean Loss=0.163]

 10%|█         | 565/5500 [05:32<47:00,  1.75it/s, Mean Loss=0.123]

 10%|█         | 566/5500 [05:33<47:34,  1.73it/s, Mean Loss=0.123]

 10%|█         | 566/5500 [05:33<47:34,  1.73it/s, Mean Loss=0.123]

 10%|█         | 567/5500 [05:33<49:22,  1.66it/s, Mean Loss=0.123]

 10%|█         | 567/5500 [05:33<49:22,  1.66it/s, Mean Loss=0.148]

 10%|█         | 568/5500 [05:34<48:05,  1.71it/s, Mean Loss=0.148]

 10%|█         | 568/5500 [05:34<48:05,  1.71it/s, Mean Loss=0.127]

 10%|█         | 569/5500 [05:34<46:46,  1.76it/s, Mean Loss=0.127]

 10%|█         | 569/5500 [05:34<46:46,  1.76it/s, Mean Loss=0.227]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 168.46it/s]




 10%|█         | 570/5500 [05:35<50:37,  1.62it/s, Mean Loss=0.227]

 10%|█         | 570/5500 [05:35<50:37,  1.62it/s, Mean Loss=0.137]

 10%|█         | 571/5500 [05:36<53:06,  1.55it/s, Mean Loss=0.137]

 10%|█         | 571/5500 [05:36<53:06,  1.55it/s, Mean Loss=0.0589]

 10%|█         | 572/5500 [05:37<53:42,  1.53it/s, Mean Loss=0.0589]

 10%|█         | 572/5500 [05:37<53:42,  1.53it/s, Mean Loss=0.133] 

 10%|█         | 573/5500 [05:37<52:44,  1.56it/s, Mean Loss=0.133]

 10%|█         | 573/5500 [05:37<52:44,  1.56it/s, Mean Loss=0.168]

 10%|█         | 574/5500 [05:38<50:54,  1.61it/s, Mean Loss=0.168]

 10%|█         | 574/5500 [05:38<50:54,  1.61it/s, Mean Loss=0.206]

 10%|█         | 575/5500 [05:38<49:54,  1.64it/s, Mean Loss=0.206]

 10%|█         | 575/5500 [05:38<49:54,  1.64it/s, Mean Loss=0.119]

 10%|█         | 576/5500 [05:39<52:58,  1.55it/s, Mean Loss=0.119]

 10%|█         | 576/5500 [05:39<52:58,  1.55it/s, Mean Loss=0.197]

 10%|█         | 577/5500 [05:40<53:10,  1.54it/s, Mean Loss=0.197]

 10%|█         | 577/5500 [05:40<53:10,  1.54it/s, Mean Loss=0.17] 

 11%|█         | 578/5500 [05:40<52:02,  1.58it/s, Mean Loss=0.17]

 11%|█         | 578/5500 [05:40<52:02,  1.58it/s, Mean Loss=0.0615]

 11%|█         | 579/5500 [05:41<49:58,  1.64it/s, Mean Loss=0.0615]

 11%|█         | 579/5500 [05:41<49:58,  1.64it/s, Mean Loss=0.158] 




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 158.58it/s]




 11%|█         | 580/5500 [05:42<51:23,  1.60it/s, Mean Loss=0.158]

 11%|█         | 580/5500 [05:42<51:23,  1.60it/s, Mean Loss=0.138]

 11%|█         | 581/5500 [05:42<51:02,  1.61it/s, Mean Loss=0.138]

 11%|█         | 581/5500 [05:42<51:02,  1.61it/s, Mean Loss=0.204]

 11%|█         | 582/5500 [05:43<49:34,  1.65it/s, Mean Loss=0.204]

 11%|█         | 582/5500 [05:43<49:34,  1.65it/s, Mean Loss=0.236]

 11%|█         | 583/5500 [05:43<50:13,  1.63it/s, Mean Loss=0.236]

 11%|█         | 583/5500 [05:43<50:13,  1.63it/s, Mean Loss=0.203]

 11%|█         | 584/5500 [05:44<51:39,  1.59it/s, Mean Loss=0.203]

 11%|█         | 584/5500 [05:44<51:39,  1.59it/s, Mean Loss=0.131]

 11%|█         | 585/5500 [05:45<50:12,  1.63it/s, Mean Loss=0.131]

 11%|█         | 585/5500 [05:45<50:12,  1.63it/s, Mean Loss=0.146]

 11%|█         | 586/5500 [05:45<49:17,  1.66it/s, Mean Loss=0.146]

 11%|█         | 586/5500 [05:45<49:17,  1.66it/s, Mean Loss=0.219]

 11%|█         | 587/5500 [05:46<47:35,  1.72it/s, Mean Loss=0.219]

 11%|█         | 587/5500 [05:46<47:35,  1.72it/s, Mean Loss=0.131]

 11%|█         | 588/5500 [05:46<48:41,  1.68it/s, Mean Loss=0.131]

 11%|█         | 588/5500 [05:46<48:41,  1.68it/s, Mean Loss=0.209]

 11%|█         | 589/5500 [05:47<47:32,  1.72it/s, Mean Loss=0.209]

 11%|█         | 589/5500 [05:47<47:32,  1.72it/s, Mean Loss=0.217]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 175.96it/s]




 11%|█         | 590/5500 [05:48<50:11,  1.63it/s, Mean Loss=0.217]

 11%|█         | 590/5500 [05:48<50:11,  1.63it/s, Mean Loss=0.0975]

 11%|█         | 591/5500 [05:48<48:30,  1.69it/s, Mean Loss=0.0975]

 11%|█         | 591/5500 [05:48<48:30,  1.69it/s, Mean Loss=0.213] 

 11%|█         | 592/5500 [05:49<47:35,  1.72it/s, Mean Loss=0.213]

 11%|█         | 592/5500 [05:49<47:35,  1.72it/s, Mean Loss=0.132]

 11%|█         | 593/5500 [05:49<47:40,  1.72it/s, Mean Loss=0.132]

 11%|█         | 593/5500 [05:49<47:40,  1.72it/s, Mean Loss=0.0898]

 11%|█         | 594/5500 [05:50<47:58,  1.70it/s, Mean Loss=0.0898]

 11%|█         | 594/5500 [05:50<47:58,  1.70it/s, Mean Loss=0.149] 

 11%|█         | 595/5500 [05:50<45:51,  1.78it/s, Mean Loss=0.149]

 11%|█         | 595/5500 [05:50<45:51,  1.78it/s, Mean Loss=0.21] 

 11%|█         | 596/5500 [05:51<45:09,  1.81it/s, Mean Loss=0.21]

 11%|█         | 596/5500 [05:51<45:09,  1.81it/s, Mean Loss=0.238]

 11%|█         | 597/5500 [05:51<45:04,  1.81it/s, Mean Loss=0.238]

 11%|█         | 597/5500 [05:51<45:04,  1.81it/s, Mean Loss=0.155]

 11%|█         | 598/5500 [05:52<45:38,  1.79it/s, Mean Loss=0.155]

 11%|█         | 598/5500 [05:52<45:38,  1.79it/s, Mean Loss=0.114]

 11%|█         | 599/5500 [05:53<45:31,  1.79it/s, Mean Loss=0.114]

 11%|█         | 599/5500 [05:53<45:31,  1.79it/s, Mean Loss=0.117]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 162.98it/s]




 11%|█         | 600/5500 [05:53<48:00,  1.70it/s, Mean Loss=0.117]

 11%|█         | 600/5500 [05:53<48:00,  1.70it/s, Mean Loss=0.169]

 11%|█         | 601/5500 [05:54<45:43,  1.79it/s, Mean Loss=0.169]

 11%|█         | 601/5500 [05:54<45:43,  1.79it/s, Mean Loss=0.145]

 11%|█         | 602/5500 [05:54<47:00,  1.74it/s, Mean Loss=0.145]

 11%|█         | 602/5500 [05:54<47:00,  1.74it/s, Mean Loss=0.202]

 11%|█         | 603/5500 [05:55<46:29,  1.76it/s, Mean Loss=0.202]

 11%|█         | 603/5500 [05:55<46:29,  1.76it/s, Mean Loss=0.232]

 11%|█         | 604/5500 [05:55<45:46,  1.78it/s, Mean Loss=0.232]

 11%|█         | 604/5500 [05:55<45:46,  1.78it/s, Mean Loss=0.206]

 11%|█         | 605/5500 [05:56<45:34,  1.79it/s, Mean Loss=0.206]

 11%|█         | 605/5500 [05:56<45:34,  1.79it/s, Mean Loss=0.167]

 11%|█         | 606/5500 [05:57<45:48,  1.78it/s, Mean Loss=0.167]

 11%|█         | 606/5500 [05:57<45:48,  1.78it/s, Mean Loss=0.166]

 11%|█         | 607/5500 [05:57<44:46,  1.82it/s, Mean Loss=0.166]

 11%|█         | 607/5500 [05:57<44:46,  1.82it/s, Mean Loss=0.113]

 11%|█         | 608/5500 [05:58<43:48,  1.86it/s, Mean Loss=0.113]

 11%|█         | 608/5500 [05:58<43:48,  1.86it/s, Mean Loss=0.211]

 11%|█         | 609/5500 [05:58<43:17,  1.88it/s, Mean Loss=0.211]

 11%|█         | 609/5500 [05:58<43:17,  1.88it/s, Mean Loss=0.138]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 201.44it/s]




 11%|█         | 610/5500 [05:59<45:52,  1.78it/s, Mean Loss=0.138]

 11%|█         | 610/5500 [05:59<45:52,  1.78it/s, Mean Loss=0.208]

 11%|█         | 611/5500 [05:59<44:45,  1.82it/s, Mean Loss=0.208]

 11%|█         | 611/5500 [05:59<44:45,  1.82it/s, Mean Loss=0.132]

 11%|█         | 612/5500 [06:00<43:58,  1.85it/s, Mean Loss=0.132]

 11%|█         | 612/5500 [06:00<43:58,  1.85it/s, Mean Loss=0.185]

 11%|█         | 613/5500 [06:00<44:13,  1.84it/s, Mean Loss=0.185]

 11%|█         | 613/5500 [06:00<44:13,  1.84it/s, Mean Loss=0.168]

 11%|█         | 614/5500 [06:01<43:58,  1.85it/s, Mean Loss=0.168]

 11%|█         | 614/5500 [06:01<43:58,  1.85it/s, Mean Loss=0.224]

 11%|█         | 615/5500 [06:01<42:54,  1.90it/s, Mean Loss=0.224]

 11%|█         | 615/5500 [06:01<42:54,  1.90it/s, Mean Loss=0.161]

 11%|█         | 616/5500 [06:02<43:02,  1.89it/s, Mean Loss=0.161]

 11%|█         | 616/5500 [06:02<43:02,  1.89it/s, Mean Loss=0.11] 

 11%|█         | 617/5500 [06:02<42:24,  1.92it/s, Mean Loss=0.11]

 11%|█         | 617/5500 [06:02<42:24,  1.92it/s, Mean Loss=0.147]

 11%|█         | 618/5500 [06:03<43:31,  1.87it/s, Mean Loss=0.147]

 11%|█         | 618/5500 [06:03<43:31,  1.87it/s, Mean Loss=0.248]

 11%|█▏        | 619/5500 [06:03<44:24,  1.83it/s, Mean Loss=0.248]

 11%|█▏        | 619/5500 [06:03<44:24,  1.83it/s, Mean Loss=0.223]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 206.33it/s]




 11%|█▏        | 620/5500 [06:04<47:13,  1.72it/s, Mean Loss=0.223]

 11%|█▏        | 620/5500 [06:04<47:13,  1.72it/s, Mean Loss=0.0648]

 11%|█▏        | 621/5500 [06:05<46:31,  1.75it/s, Mean Loss=0.0648]

 11%|█▏        | 621/5500 [06:05<46:31,  1.75it/s, Mean Loss=0.145] 

 11%|█▏        | 622/5500 [06:05<46:17,  1.76it/s, Mean Loss=0.145]

 11%|█▏        | 622/5500 [06:05<46:17,  1.76it/s, Mean Loss=0.232]

 11%|█▏        | 623/5500 [06:06<45:38,  1.78it/s, Mean Loss=0.232]

 11%|█▏        | 623/5500 [06:06<45:38,  1.78it/s, Mean Loss=0.172]

 11%|█▏        | 624/5500 [06:06<44:11,  1.84it/s, Mean Loss=0.172]

 11%|█▏        | 624/5500 [06:06<44:11,  1.84it/s, Mean Loss=0.244]

 11%|█▏        | 625/5500 [06:07<43:55,  1.85it/s, Mean Loss=0.244]

 11%|█▏        | 625/5500 [06:07<43:55,  1.85it/s, Mean Loss=0.135]

 11%|█▏        | 626/5500 [06:07<43:43,  1.86it/s, Mean Loss=0.135]

 11%|█▏        | 626/5500 [06:07<43:43,  1.86it/s, Mean Loss=0.23] 

 11%|█▏        | 627/5500 [06:08<43:38,  1.86it/s, Mean Loss=0.23]

 11%|█▏        | 627/5500 [06:08<43:38,  1.86it/s, Mean Loss=0.106]

 11%|█▏        | 628/5500 [06:08<44:00,  1.85it/s, Mean Loss=0.106]

 11%|█▏        | 628/5500 [06:08<44:00,  1.85it/s, Mean Loss=0.179]

 11%|█▏        | 629/5500 [06:09<44:32,  1.82it/s, Mean Loss=0.179]

 11%|█▏        | 629/5500 [06:09<44:32,  1.82it/s, Mean Loss=0.137]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 173.48it/s]




 11%|█▏        | 630/5500 [06:10<46:57,  1.73it/s, Mean Loss=0.137]

 11%|█▏        | 630/5500 [06:10<46:57,  1.73it/s, Mean Loss=0.0691]

 11%|█▏        | 631/5500 [06:10<46:21,  1.75it/s, Mean Loss=0.0691]

 11%|█▏        | 631/5500 [06:10<46:21,  1.75it/s, Mean Loss=0.101] 

 11%|█▏        | 632/5500 [06:11<47:07,  1.72it/s, Mean Loss=0.101]

 11%|█▏        | 632/5500 [06:11<47:07,  1.72it/s, Mean Loss=0.214]

 12%|█▏        | 633/5500 [06:11<47:47,  1.70it/s, Mean Loss=0.214]

 12%|█▏        | 633/5500 [06:11<47:47,  1.70it/s, Mean Loss=0.107]

 12%|█▏        | 634/5500 [06:14<1:35:33,  1.18s/it, Mean Loss=0.107]

 12%|█▏        | 634/5500 [06:14<1:35:33,  1.18s/it, Mean Loss=0.226]

 12%|█▏        | 635/5500 [06:15<1:23:01,  1.02s/it, Mean Loss=0.226]

 12%|█▏        | 635/5500 [06:15<1:23:01,  1.02s/it, Mean Loss=0.184]

 12%|█▏        | 636/5500 [06:15<1:17:41,  1.04it/s, Mean Loss=0.184]

 12%|█▏        | 636/5500 [06:15<1:17:41,  1.04it/s, Mean Loss=0.169]

 12%|█▏        | 637/5500 [06:16<1:08:13,  1.19it/s, Mean Loss=0.169]

 12%|█▏        | 637/5500 [06:16<1:08:13,  1.19it/s, Mean Loss=0.162]

 12%|█▏        | 638/5500 [06:17<1:04:11,  1.26it/s, Mean Loss=0.162]

 12%|█▏        | 638/5500 [06:17<1:04:11,  1.26it/s, Mean Loss=0.115]

 12%|█▏        | 639/5500 [06:17<1:02:54,  1.29it/s, Mean Loss=0.115]

 12%|█▏        | 639/5500 [06:17<1:02:54,  1.29it/s, Mean Loss=0.186]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 189.95it/s]




 12%|█▏        | 640/5500 [06:18<1:03:15,  1.28it/s, Mean Loss=0.186]

 12%|█▏        | 640/5500 [06:18<1:03:15,  1.28it/s, Mean Loss=0.116]

 12%|█▏        | 641/5500 [06:19<57:12,  1.42it/s, Mean Loss=0.116]  

 12%|█▏        | 641/5500 [06:19<57:12,  1.42it/s, Mean Loss=0.167]

 12%|█▏        | 642/5500 [06:20<1:00:46,  1.33it/s, Mean Loss=0.167]

 12%|█▏        | 642/5500 [06:20<1:00:46,  1.33it/s, Mean Loss=0.144]

 12%|█▏        | 643/5500 [06:20<58:44,  1.38it/s, Mean Loss=0.144]  

 12%|█▏        | 643/5500 [06:20<58:44,  1.38it/s, Mean Loss=0.267]

 12%|█▏        | 644/5500 [06:21<1:03:44,  1.27it/s, Mean Loss=0.267]

 12%|█▏        | 644/5500 [06:21<1:03:44,  1.27it/s, Mean Loss=0.114]

 12%|█▏        | 645/5500 [06:22<58:23,  1.39it/s, Mean Loss=0.114]  

 12%|█▏        | 645/5500 [06:22<58:23,  1.39it/s, Mean Loss=0.125]

 12%|█▏        | 646/5500 [06:23<1:09:07,  1.17it/s, Mean Loss=0.125]

 12%|█▏        | 646/5500 [06:23<1:09:07,  1.17it/s, Mean Loss=0.268]

 12%|█▏        | 647/5500 [06:24<1:03:42,  1.27it/s, Mean Loss=0.268]

 12%|█▏        | 647/5500 [06:24<1:03:42,  1.27it/s, Mean Loss=0.0521]

 12%|█▏        | 648/5500 [06:24<57:07,  1.42it/s, Mean Loss=0.0521]  

 12%|█▏        | 648/5500 [06:24<57:07,  1.42it/s, Mean Loss=0.122] 

 12%|█▏        | 649/5500 [06:25<53:34,  1.51it/s, Mean Loss=0.122]

 12%|█▏        | 649/5500 [06:25<53:34,  1.51it/s, Mean Loss=0.138]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 162.47it/s]




 12%|█▏        | 650/5500 [06:25<55:49,  1.45it/s, Mean Loss=0.138]

 12%|█▏        | 650/5500 [06:25<55:49,  1.45it/s, Mean Loss=0.146]

 12%|█▏        | 651/5500 [06:26<53:14,  1.52it/s, Mean Loss=0.146]

 12%|█▏        | 651/5500 [06:26<53:14,  1.52it/s, Mean Loss=0.202]

 12%|█▏        | 652/5500 [06:27<50:56,  1.59it/s, Mean Loss=0.202]

 12%|█▏        | 652/5500 [06:27<50:56,  1.59it/s, Mean Loss=0.155]

 12%|█▏        | 653/5500 [06:27<48:58,  1.65it/s, Mean Loss=0.155]

 12%|█▏        | 653/5500 [06:27<48:58,  1.65it/s, Mean Loss=0.149]

 12%|█▏        | 654/5500 [06:28<47:53,  1.69it/s, Mean Loss=0.149]

 12%|█▏        | 654/5500 [06:28<47:53,  1.69it/s, Mean Loss=0.092]

 12%|█▏        | 655/5500 [06:28<47:22,  1.70it/s, Mean Loss=0.092]

 12%|█▏        | 655/5500 [06:28<47:22,  1.70it/s, Mean Loss=0.135]

 12%|█▏        | 656/5500 [06:29<47:35,  1.70it/s, Mean Loss=0.135]

 12%|█▏        | 656/5500 [06:29<47:35,  1.70it/s, Mean Loss=0.0776]

 12%|█▏        | 657/5500 [06:29<48:40,  1.66it/s, Mean Loss=0.0776]

 12%|█▏        | 657/5500 [06:30<48:40,  1.66it/s, Mean Loss=0.13]  

 12%|█▏        | 658/5500 [06:30<48:40,  1.66it/s, Mean Loss=0.13]

 12%|█▏        | 658/5500 [06:30<48:40,  1.66it/s, Mean Loss=0.113]

 12%|█▏        | 659/5500 [06:31<48:29,  1.66it/s, Mean Loss=0.113]

 12%|█▏        | 659/5500 [06:31<48:29,  1.66it/s, Mean Loss=0.114]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 183.58it/s]




 12%|█▏        | 660/5500 [06:31<50:31,  1.60it/s, Mean Loss=0.114]

 12%|█▏        | 660/5500 [06:31<50:31,  1.60it/s, Mean Loss=0.102]

 12%|█▏        | 661/5500 [06:32<51:05,  1.58it/s, Mean Loss=0.102]

 12%|█▏        | 661/5500 [06:32<51:05,  1.58it/s, Mean Loss=0.164]

 12%|█▏        | 662/5500 [06:33<50:19,  1.60it/s, Mean Loss=0.164]

 12%|█▏        | 662/5500 [06:33<50:19,  1.60it/s, Mean Loss=0.179]

 12%|█▏        | 663/5500 [06:33<50:32,  1.60it/s, Mean Loss=0.179]

 12%|█▏        | 663/5500 [06:33<50:32,  1.60it/s, Mean Loss=0.184]

 12%|█▏        | 664/5500 [06:34<51:50,  1.55it/s, Mean Loss=0.184]

 12%|█▏        | 664/5500 [06:34<51:50,  1.55it/s, Mean Loss=0.142]

 12%|█▏        | 665/5500 [06:35<50:05,  1.61it/s, Mean Loss=0.142]

 12%|█▏        | 665/5500 [06:35<50:05,  1.61it/s, Mean Loss=0.174]

 12%|█▏        | 666/5500 [06:35<49:29,  1.63it/s, Mean Loss=0.174]

 12%|█▏        | 666/5500 [06:35<49:29,  1.63it/s, Mean Loss=0.12] 

 12%|█▏        | 667/5500 [06:36<49:57,  1.61it/s, Mean Loss=0.12]

 12%|█▏        | 667/5500 [06:36<49:57,  1.61it/s, Mean Loss=0.113]

 12%|█▏        | 668/5500 [06:36<49:39,  1.62it/s, Mean Loss=0.113]

 12%|█▏        | 668/5500 [06:36<49:39,  1.62it/s, Mean Loss=0.226]

 12%|█▏        | 669/5500 [06:37<49:24,  1.63it/s, Mean Loss=0.226]

 12%|█▏        | 669/5500 [06:37<49:24,  1.63it/s, Mean Loss=0.113]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 200.63it/s]




 12%|█▏        | 670/5500 [06:38<50:10,  1.60it/s, Mean Loss=0.113]

 12%|█▏        | 670/5500 [06:38<50:10,  1.60it/s, Mean Loss=0.0941]

 12%|█▏        | 671/5500 [06:38<50:00,  1.61it/s, Mean Loss=0.0941]

 12%|█▏        | 671/5500 [06:38<50:00,  1.61it/s, Mean Loss=0.0995]

 12%|█▏        | 672/5500 [06:39<49:08,  1.64it/s, Mean Loss=0.0995]

 12%|█▏        | 672/5500 [06:39<49:08,  1.64it/s, Mean Loss=0.184] 

 12%|█▏        | 673/5500 [06:39<47:32,  1.69it/s, Mean Loss=0.184]

 12%|█▏        | 673/5500 [06:39<47:32,  1.69it/s, Mean Loss=0.193]

 12%|█▏        | 674/5500 [06:40<46:27,  1.73it/s, Mean Loss=0.193]

 12%|█▏        | 674/5500 [06:40<46:27,  1.73it/s, Mean Loss=0.116]

 12%|█▏        | 675/5500 [06:40<45:50,  1.75it/s, Mean Loss=0.116]

 12%|█▏        | 675/5500 [06:40<45:50,  1.75it/s, Mean Loss=0.198]

 12%|█▏        | 676/5500 [06:41<45:24,  1.77it/s, Mean Loss=0.198]

 12%|█▏        | 676/5500 [06:41<45:24,  1.77it/s, Mean Loss=0.116]

 12%|█▏        | 677/5500 [06:42<44:49,  1.79it/s, Mean Loss=0.116]

 12%|█▏        | 677/5500 [06:42<44:49,  1.79it/s, Mean Loss=0.0883]

 12%|█▏        | 678/5500 [06:42<44:37,  1.80it/s, Mean Loss=0.0883]

 12%|█▏        | 678/5500 [06:42<44:37,  1.80it/s, Mean Loss=0.0609]

 12%|█▏        | 679/5500 [06:43<44:20,  1.81it/s, Mean Loss=0.0609]

 12%|█▏        | 679/5500 [06:43<44:20,  1.81it/s, Mean Loss=0.0712]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 224.05it/s]




 12%|█▏        | 680/5500 [06:43<46:19,  1.73it/s, Mean Loss=0.0712]

 12%|█▏        | 680/5500 [06:43<46:19,  1.73it/s, Mean Loss=0.0849]

 12%|█▏        | 681/5500 [06:44<45:51,  1.75it/s, Mean Loss=0.0849]

 12%|█▏        | 681/5500 [06:44<45:51,  1.75it/s, Mean Loss=0.219] 

 12%|█▏        | 682/5500 [06:44<45:47,  1.75it/s, Mean Loss=0.219]

 12%|█▏        | 682/5500 [06:44<45:47,  1.75it/s, Mean Loss=0.184]

 12%|█▏        | 683/5500 [06:45<44:34,  1.80it/s, Mean Loss=0.184]

 12%|█▏        | 683/5500 [06:45<44:34,  1.80it/s, Mean Loss=0.0753]

 12%|█▏        | 684/5500 [06:45<45:03,  1.78it/s, Mean Loss=0.0753]

 12%|█▏        | 684/5500 [06:45<45:03,  1.78it/s, Mean Loss=0.115] 

 12%|█▏        | 685/5500 [06:46<44:57,  1.79it/s, Mean Loss=0.115]

 12%|█▏        | 685/5500 [06:46<44:57,  1.79it/s, Mean Loss=0.101]

 12%|█▏        | 686/5500 [06:47<45:53,  1.75it/s, Mean Loss=0.101]

 12%|█▏        | 686/5500 [06:47<45:53,  1.75it/s, Mean Loss=0.126]

 12%|█▏        | 687/5500 [06:47<46:26,  1.73it/s, Mean Loss=0.126]

 12%|█▏        | 687/5500 [06:47<46:26,  1.73it/s, Mean Loss=0.194]

 13%|█▎        | 688/5500 [06:48<49:49,  1.61it/s, Mean Loss=0.194]

 13%|█▎        | 688/5500 [06:48<49:49,  1.61it/s, Mean Loss=0.135]

 13%|█▎        | 689/5500 [06:49<48:36,  1.65it/s, Mean Loss=0.135]

 13%|█▎        | 689/5500 [06:49<48:36,  1.65it/s, Mean Loss=0.195]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 158.25it/s]




 13%|█▎        | 690/5500 [06:49<52:25,  1.53it/s, Mean Loss=0.195]

 13%|█▎        | 690/5500 [06:49<52:25,  1.53it/s, Mean Loss=0.165]

 13%|█▎        | 691/5500 [06:50<52:04,  1.54it/s, Mean Loss=0.165]

 13%|█▎        | 691/5500 [06:50<52:04,  1.54it/s, Mean Loss=0.174]

 13%|█▎        | 692/5500 [06:51<51:27,  1.56it/s, Mean Loss=0.174]

 13%|█▎        | 692/5500 [06:51<51:27,  1.56it/s, Mean Loss=0.127]

 13%|█▎        | 693/5500 [06:51<49:45,  1.61it/s, Mean Loss=0.127]

 13%|█▎        | 693/5500 [06:51<49:45,  1.61it/s, Mean Loss=0.136]

 13%|█▎        | 694/5500 [06:52<48:14,  1.66it/s, Mean Loss=0.136]

 13%|█▎        | 694/5500 [06:52<48:14,  1.66it/s, Mean Loss=0.081]

 13%|█▎        | 695/5500 [06:52<49:14,  1.63it/s, Mean Loss=0.081]

 13%|█▎        | 695/5500 [06:52<49:14,  1.63it/s, Mean Loss=0.192]

 13%|█▎        | 696/5500 [06:53<50:18,  1.59it/s, Mean Loss=0.192]

 13%|█▎        | 696/5500 [06:53<50:18,  1.59it/s, Mean Loss=0.175]

 13%|█▎        | 697/5500 [06:54<50:22,  1.59it/s, Mean Loss=0.175]

 13%|█▎        | 697/5500 [06:54<50:22,  1.59it/s, Mean Loss=0.101]

 13%|█▎        | 698/5500 [06:54<51:42,  1.55it/s, Mean Loss=0.101]

 13%|█▎        | 698/5500 [06:54<51:42,  1.55it/s, Mean Loss=0.212]

 13%|█▎        | 699/5500 [06:55<54:29,  1.47it/s, Mean Loss=0.212]

 13%|█▎        | 699/5500 [06:55<54:29,  1.47it/s, Mean Loss=0.19] 




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 180.81it/s]




 13%|█▎        | 700/5500 [06:56<57:05,  1.40it/s, Mean Loss=0.19]

 13%|█▎        | 700/5500 [06:56<57:05,  1.40it/s, Mean Loss=0.209]

 13%|█▎        | 701/5500 [06:56<54:36,  1.46it/s, Mean Loss=0.209]

 13%|█▎        | 701/5500 [06:56<54:36,  1.46it/s, Mean Loss=0.163]

 13%|█▎        | 702/5500 [06:57<51:42,  1.55it/s, Mean Loss=0.163]

 13%|█▎        | 702/5500 [06:57<51:42,  1.55it/s, Mean Loss=0.107]

 13%|█▎        | 703/5500 [06:58<49:18,  1.62it/s, Mean Loss=0.107]

 13%|█▎        | 703/5500 [06:58<49:18,  1.62it/s, Mean Loss=0.103]

 13%|█▎        | 704/5500 [06:58<48:51,  1.64it/s, Mean Loss=0.103]

 13%|█▎        | 704/5500 [06:58<48:51,  1.64it/s, Mean Loss=0.153]

 13%|█▎        | 705/5500 [06:59<51:21,  1.56it/s, Mean Loss=0.153]

 13%|█▎        | 705/5500 [06:59<51:21,  1.56it/s, Mean Loss=0.149]

 13%|█▎        | 706/5500 [07:00<58:08,  1.37it/s, Mean Loss=0.149]

 13%|█▎        | 706/5500 [07:00<58:08,  1.37it/s, Mean Loss=0.122]

 13%|█▎        | 707/5500 [07:01<59:05,  1.35it/s, Mean Loss=0.122]

 13%|█▎        | 707/5500 [07:01<59:05,  1.35it/s, Mean Loss=0.0939]

 13%|█▎        | 708/5500 [07:01<58:22,  1.37it/s, Mean Loss=0.0939]

 13%|█▎        | 708/5500 [07:01<58:22,  1.37it/s, Mean Loss=0.151] 

 13%|█▎        | 709/5500 [07:02<59:02,  1.35it/s, Mean Loss=0.151]

 13%|█▎        | 709/5500 [07:02<59:02,  1.35it/s, Mean Loss=0.155]




Evaluation:   0%|          | 0/15 [00:00<?, ?it/s]

[A

Evaluation: 100%|██████████| 15/15 [00:00<00:00, 179.17it/s]




 13%|█▎        | 710/5500 [07:04<1:16:00,  1.05it/s, Mean Loss=0.155]

## Analysis

In [None]:
plots_data = {
    "Epoch": log_data["Epoch"],
    "Time": log_data["Time"],
    "Eval Epoch": log_data["Eval Epoch"],
    "Eval Time": log_data["Eval Time"],
    "Mean Loss": moving_average(log_data["Mean Loss"]),
    "Mean Test Loss": log_data["Mean Test Loss"],
    "Minimizing constant": minimizing_constant,
}

plots = LOGGER.log_plots

draw_plots(
    plots_data,
    plots,
    plot_width=8,
    plot_height=4,
    row_plots=2,
)