## Results 

One of possibilities for improvment we worked on was to predict income of each client in the next month using history of transactions as features. To do that we changed output of baseline model to be real numbers and used MSELoss to train model. Result of predictions can be viewed in the last cell

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
! pip install --upgrade numpy pandas tqdm torch catalyst==20.09

Requirement already up-to-date: numpy in /usr/local/lib/python3.6/dist-packages (1.19.2)
Requirement already up-to-date: pandas in /usr/local/lib/python3.6/dist-packages (1.1.2)
Requirement already up-to-date: tqdm in /usr/local/lib/python3.6/dist-packages (4.49.0)
Requirement already up-to-date: torch in /usr/local/lib/python3.6/dist-packages (1.6.0+cu101)
Requirement already up-to-date: catalyst==20.09 in /usr/local/lib/python3.6/dist-packages (20.9)


In [1]:
import os
import json
import pickle
from bisect import bisect_left, bisect_right
from datetime import datetime, timedelta
from collections import defaultdict, Counter

import numpy as np
import pandas as pd
from tqdm import tqdm

# GPU hack if you need
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
os.listdir("./sample_data")

['anscombe.json',
 'README.md',
 'mnist_test.csv',
 'mnist_train_small.csv',
 'california_housing_train.csv',
 'california_housing_test.csv']

# Data

Columns
- `party_rk` – client unique identifier
- `account_rk` – client account unique identifier
- `financial_account_type_cd` – debit/credit card flag
- `transaction_dttm` – operation datetime
- `transaction_type_desc` – purchase/payment/...
- `transaction_amt_rur` – transaction price
- `merchant_type` - DUTY FREE STORES/FUEL DEALERS/RESTAURANTS/ etc
- `merchant_group_rk` - McDonald's/Wildberries/ etc

It's important that table is already sorted by `transaction_dttm` column!!!

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
DATADIR = "/content/drive/My Drive" # "./data"
transactions_path = f"{DATADIR}/avk_hackathon_data_transactions.csv"
to_merge = pd.read_csv(f"{DATADIR}/avk_hackathon_data_transactions.csv")

In [None]:
df.head(100000)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,party_rk,account_rk,financial_account_type_cd,transaction_dttm,transaction_type_desc,transaction_amt_rur,merchant_rk,merchant_type,merchant_group_rk,category,month,cur_month,balance_chng
0,0,0,20337,19666,1,2019-01-01,Покупка,84.00,88676.0,348.0,,Сувениры,2019-01,2019-01-31,15000.0
1,868408,868408,8510,7191,1,2019-01-01,Покупка,17.90,667802.0,286.0,979.0,Супермаркеты,2019-01,2019-01-31,-10000.0
2,557568,557568,14371,28929,2,2019-01-01,Покупка,3064.00,278395.0,286.0,356.0,Супермаркеты,2019-01,2019-01-31,-5000.0
3,868410,868410,8510,7191,1,2019-01-01,Покупка,1011.69,672519.0,297.0,212.0,Топливо,2019-01,2019-01-31,-10000.0
4,557567,557567,14371,28929,2,2019-01-01,Покупка,224.00,611123.0,330.0,,Фаст Фуд,2019-01,2019-01-31,-5000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,904723,904723,62336,71591,2,2019-01-05,Покупка,130.00,1061002.0,291.0,,Супермаркеты,2019-01,2019-01-31,-10000.0
99996,455158,455158,41272,41841,2,2019-01-05,Покупка,105.00,291942.0,284.0,,Разные товары,2019-01,2019-01-31,0.0
99997,885223,885223,38779,38705,1,2019-01-05,Платеж,5099.00,623671.0,1.0,,,2019-01,2019-01-31,0.0
99998,164920,164920,41167,41755,2,2019-01-05,Покупка,259.18,56980.0,286.0,878.0,Супермаркеты,2019-01,2019-01-31,-15000.0


In [None]:
to_merge['month'] = to_merge['transaction_dttm'].map(lambda x: x[:7])

In [None]:
merged = pd.merge( left = to_merge, right = f, left_on = ['party_rk','month'], right_on = ['party_rk','month'] )

In [None]:
merged = df.sort_values(by = 'transaction_dttm', )

In [None]:
merged.to_csv("/content/drive/My Drive/avk_hackathon_data_account_total_data.csv")

Unnamed: 0,balance_chng
0,15000.0
1,-10000.0
2,-5000.0
3,-10000.0
4,-5000.0
...,...
11587770,5000.0
11587771,15000.0
11587772,15000.0
11587773,15000.0


In [2]:
DATADIR = "./" # "./data"
transactions_path = f"{DATADIR}/avk_hackathon_data_account_total_data.csv"
#df = pd.read_csv(f"{DATADIR}/avk_hackathon_data_account_total_data.csv")

## Mappings
~1 min

In [19]:
# Prepare & save mappings
mappings = defaultdict(dict)
unk_token = "<UNK>"


def create_mapping(values):
    mapping = {unk_token: 0}
    for v in values:
        if not pd.isna(v):
            mapping[str(v)] = len(mapping)

    return mapping


for col in tqdm(
    [
        "transaction_type_desc",
        "merchant_rk",
        "merchant_type",
        "merchant_group_rk",
        "category",
        "financial_account_type_cd",
        "balance_chng"
    ]
):

    col_values = (
        pd.read_csv(transactions_path, usecols=[col])[col]
        .fillna(unk_token)
        .astype(str)
    )
    mappings[col] = create_mapping(col_values.unique())
    del col_values


with open(f"{DATADIR}/mappings.json", "w") as f:
    json.dump(mappings, f)

100%|██████████| 7/7 [01:57<00:00, 16.83s/it]


In [3]:
with open(f"{DATADIR}/mappings.json", 'r') as f:
     mappings = json.load(f)

## Parse transactions by users
~ 40 min

In [None]:
# Prepare & save client data
party2dates = defaultdict(list)  # for each party save a series of the transaction dates 
party2sum = defaultdict(list)  # for each party save a series of the transaction costs 
party2merchant_type = defaultdict(list)  # for each party save a series of the transaction_type 
party2trans_type = defaultdict(list)
                                      # for each party save a series of the transaction merchant_type

usecols = [
    "party_rk",
    "transaction_dttm",
    "transaction_amt_rur",
    "merchant_type",
    "transaction_type_desc",
    
]

for chunk in tqdm(
    pd.read_csv(transactions_path, usecols=usecols, chunksize=100_000)
):

    chunk["merchant_type"] = (
        chunk["merchant_type"].fillna(unk_token).astype(str)
    )
    chunk["transaction_type_desc"] = (
        chunk["transaction_type_desc"].fillna(unk_token).astype(str)
    )
    chunk["transaction_amt_rur"] = chunk["transaction_amt_rur"].fillna(0)

    for i, row in chunk.iterrows():
        party2dates[row.party_rk].append(row.transaction_dttm)
        party2sum[row.party_rk].append(row.transaction_amt_rur)
        party2merchant_type[row.party_rk].append(
            mappings["merchant_type"][row.merchant_type]
        )
        party2trans_type[row.party_rk].append(
            mappings["transaction_type_desc"][row.transaction_type_desc]
        )

    del chunk

pickle.dump(party2dates, open(f"{DATADIR}/party2dates.pkl", "wb"))
pickle.dump(party2sum, open(f"{DATADIR}/party2sum.pkl", "wb"))
pickle.dump(party2merchant_type, open(f"{DATADIR}/party2merchant_type.pkl", "wb"))
pickle.dump(party2trans_type, open(f"{DATADIR}/party2trans_type.pkl", "wb"))

116it [24:04, 12.46s/it]


In [4]:
party2dates = pickle.load(open(f"{DATADIR}/party2dates-2.pkl", "rb"))
party2sum = pickle.load(open(f"{DATADIR}/party2sum-2.pkl", "rb"))
party2merchant_type = pickle.load(open(f"{DATADIR}/party2merchant_type-2.pkl", "rb"))
party2trans_type = pickle.load(open(f"{DATADIR}/party2trans_type-2.pkl", "rb"))

In [None]:
# Prepare & save client data
party2balance = defaultdict(list)  # for each party save a series of the transaction dates 


usecols = [
    "party_rk",
    "balance_chng"

]

for chunk in tqdm(
    pd.read_csv(transactions_path, usecols=usecols, chunksize=100_000)
):
   
    
    chunk["balance_chng"] = chunk["balance_chng"].fillna(0)

    for i, row in chunk.iterrows():
        
        party2balance[row.party_rk].append(row.balance_chng
        )

    del chunk

pickle.dump(party2balance, open(f"{DATADIR}/party2balance.pkl", "wb"))


116it [11:16,  5.83s/it]


In [5]:
party2balance = pickle.load(open(f"{DATADIR}party2balance.pkl", "rb"))

In [None]:
# load client data
# party2dates = pickle.load(open(f"{DATADIR}/party2dates.pkl", 'rb'))
# party2sum = pickle.load(open(f"{DATADIR}/party2sum.pkl", 'rb'))
# party2merchant_type = pickle.load(open(f"{DATADIR}/party2merchant_type.pkl", 'rb'))
# party2trans_type = pickle.load(open(f"{DATADIR}/party2trans_type.pkl", 'rb'))

In [None]:
party2balance

## PyTorch dataset

In [97]:
from sklearn.model_selection import train_test_split

train_party, valid_party = train_test_split(
    pd.read_csv(transactions_path, usecols=['party_rk']).party_rk.unique(), 
    train_size=0.8, random_state=42
)

print(f'Train: {len(train_party)} Val: {len(valid_party)}')

Train: 39557 Val: 9890


In [98]:
predict_period_len = 60  # -- days
train_predict_dates = (
    pd.date_range("2019-03-01", "2019-10-31", freq="MS")
    .strftime("%Y-%m-%d")
    .tolist()
)
valid_predict_dates = (
    pd.date_range("2019-11-01", "2019-12-31", freq="MS")
    .strftime("%Y-%m-%d")
    .tolist()
)
submission_predict_dates = (
    pd.date_range("2020-01-01", "2020-02-28", freq="2MS")
    .strftime("%Y-%m-%d")
    .tolist()
)

In [99]:
def prepare_data(party_list, mode="train"):
    """
    This function define the pipeline of the creation of train and valid samples.
    We consider each client from party_list. For each client take each 
    predict_period_start from predict_dates list. All client transaction before
    this date is our features. Next, we look at the customer's transactions in 
    the next two months. This transactions should be predicted. It will form 
    our labels vector.
    """

    data_sum = []
    data_trans_type = []
    data_merchant_type = []
    data_labels = []
    data_income = []
    for party_rk in tqdm(party_list):
        date_series = party2dates[party_rk]
        sum_series = party2sum[party_rk]
        merch_type_series = party2merchant_type[party_rk]
        trans_type_series = party2trans_type[party_rk]
        balance_series = party2balance[party_rk]

        if mode == "train":
            predict_dates = train_predict_dates
        elif mode == "valid":
            predict_dates = valid_predict_dates
        elif mode == "submission":
            predict_dates = submission_predict_dates
        else:
            raise Exception("Unknown mode")

        for predict_period_start in predict_dates:

            predict_period_end = datetime.strftime(
                datetime.strptime(predict_period_start, "%Y-%m-%d")
                + timedelta(days=predict_period_len),
                "%Y-%m-%d",
            )

            l, r = (
                bisect_left(date_series, predict_period_start),
                bisect_right(date_series, predict_period_end),
            )

            history_merch_type = merch_type_series[:l]
            history_sum = sum_series[:l]
            history_trans_type = trans_type_series[:l]
            history_balance = balance_series[:l]
            predict_merch = merch_type_series[l:r]
            predict_balance = balance_series[l:r]

            if predict_merch and l or mode not in ("train", "valid"):
                data_sum.append(history_sum)
                data_trans_type.append(history_trans_type)
                data_merchant_type.append(history_merch_type)
                data_income.append(history_balance)
                data_labels.append(predict_balance)

    return data_sum, data_trans_type, data_merchant_type, data_income, data_labels

In [100]:
train_sum, train_trans_type, train_merchant_type, train_income, train_labels = prepare_data(
    train_party, mode="train"
)
valid_sum, valid_trans_type, valid_merchant_type, valid_income, valid_labels = prepare_data(
    valid_party, mode="valid"
)





  0%|          | 0/39557 [00:00<?, ?it/s][A[A[A[A



  1%|          | 330/39557 [00:00<00:11, 3298.96it/s][A[A[A[A



  2%|▏         | 663/39557 [00:00<00:11, 3305.96it/s][A[A[A[A



  2%|▏         | 971/39557 [00:00<00:11, 3232.19it/s][A[A[A[A



  3%|▎         | 1319/39557 [00:00<00:11, 3300.11it/s][A[A[A[A



  4%|▍         | 1658/39557 [00:00<00:11, 3325.39it/s][A[A[A[A



  5%|▌         | 2004/39557 [00:00<00:11, 3360.25it/s][A[A[A[A



  6%|▌         | 2332/39557 [00:00<00:11, 3334.64it/s][A[A[A[A



  7%|▋         | 2664/39557 [00:00<00:11, 3291.46it/s][A[A[A[A



  8%|▊         | 3003/39557 [00:00<00:11, 3320.38it/s][A[A[A[A



  8%|▊         | 3352/39557 [00:01<00:10, 3362.04it/s][A[A[A[A



  9%|▉         | 3698/39557 [00:01<00:10, 3390.58it/s][A[A[A[A



 10%|█         | 4031/39557 [00:01<00:10, 3370.02it/s][A[A[A[A



 11%|█         | 4364/39557 [00:01<00:10, 3347.12it/s][A[A[A[A



 12%|█▏        | 4709/39557 [00:

In [69]:
#normalize labels
min = 0
max = 0
for i in train_labels:
  for j in i:
    if j>max:
      max=j
    if j<min:
      min = j

In [70]:
for i in range(len(train_labels)):
  for j in range(len(train_labels[i])):
    train_labels[i][j] = np.log(1+(train_labels[i][j]-min))

In [71]:
for i in range(len(valid_labels )):
  for j in range(len(valid_labels [i])):
    valid_labels [i][j] = np.log(1+(valid_labels [i][j]-min))

In [11]:
for i in range(len(train_labels)):
  for j in range(len(train_labels[i])):
    train_labels[i][j] = (train_labels[i][j]-min)/(max-min)

In [12]:
for i in range(len(valid_labels )):
  for j in range(len(valid_labels [i])):
    valid_labels [i][j] = (valid_labels [i][j]-min)/(max-min)


In [101]:
# Third transformation
r=0
mean = 0
for i in range(len(train_labels)):
  for j in range(len(train_labels[i])):
    r+=1
    mean += train_labels[i][j]
mean = mean/r
disp = 0
for i in range(len(train_labels)):
  for j in range(len(train_labels[i])):
    disp += (train_labels[i][j]-mean)**2
disp = disp/(r-1)
for i in range(len(train_labels)):
  for j in range(len(train_labels[i])):
    train_labels[i][j] = (train_labels[i][j] - mean)/np.sqrt(disp)
    
for i in range(len(valid_labels )):
  for j in range(len(valid_labels [i])):
    valid_labels[i][j] = (valid_labels[i][j] - mean)/np.sqrt(disp)

## PyTorch loaders

In [102]:
import torch
from torch.utils.data import Dataset, DataLoader

In [103]:
MERCH_TYPE_NCLASSES = len(mappings['merchant_type'])
TRANS_TYPE_NCLASSES = len(mappings['transaction_type_desc'])
PADDING_LEN = 300

In [104]:
class RSDataset(Dataset):
    def __init__(self, data_sum, data_trans_type, data_merchant_type, labels):
        super(RSDataset, self).__init__()
        self.data_sum = data_sum
        self.data_trans_type = data_trans_type
        self.data_merchant_type = data_merchant_type
        self.labels = labels

    def __len__(self):
        return len(self.data_sum)

    def __getitem__(self, idx):
        targets = torch.tensor(self.labels[idx][0].reshape(-1), dtype = torch.float32)

        item = {
            "features": {},
            "targets": targets,
        }

        sum_feature = np.array(self.data_sum[idx][-PADDING_LEN:])
        sum_feature = np.vectorize(lambda s: np.log(1 + s))(sum_feature)
        if sum_feature.shape[0] < PADDING_LEN:
            pad = np.zeros(
                (PADDING_LEN - sum_feature.shape[0],), dtype=np.float32
            )
            sum_feature = np.hstack((sum_feature, pad))
        item["features"]["sum"] = torch.from_numpy(sum_feature).float()
        
        for feature_name, feature_values in zip(
            ["trans_type", "merchant_type"],
            [self.data_trans_type[idx], self.data_merchant_type[idx]],
        ):

            feature_values = np.array(feature_values[-PADDING_LEN:])
            mask = np.ones(feature_values.shape[0], dtype=np.float32)
            if feature_values.shape[0] < PADDING_LEN:
                feature_values = np.append(
                    feature_values,
                    np.zeros(
                        PADDING_LEN - feature_values.shape[0], dtype=np.int64
                    ),
                )
                mask = np.append(
                    mask,
                    np.zeros(PADDING_LEN - mask.shape[0], dtype=np.float32),
                )
            item["features"][feature_name] = torch.from_numpy(feature_values).long()
            item["features"][f"{feature_name}_mask"] = torch.from_numpy(mask).float()

        return item

In [105]:
train_dataset = RSDataset(
    train_sum, train_trans_type, train_merchant_type, train_labels
)
valid_dataset = RSDataset(
    valid_sum, valid_trans_type, valid_merchant_type, valid_labels
)

In [106]:
train_loader = DataLoader(
    train_dataset, batch_size=64, shuffle=True, num_workers=2
)
valid_loader = DataLoader(
    valid_dataset, batch_size=64, shuffle=False, num_workers=2
)

## Model

This is the baseline model for predicting purchases in `merchant_type` in the next 2 months

In [107]:
import torch.nn as nn
from collections import OrderedDict

In [108]:
params = {
    'merchant_type_emb_dim': 64,
    'trans_type_embedding': 3,
    'transformer_nhead': 2,
    'transformer_dim_feedforward': 256,
    'transformer_dropout': 0.1,
    'dense_unit': 256,
    'num_layers': 4,
}

In [109]:
MERCH_TYPE_NCLASSES, TRANS_TYPE_NCLASSES

(458, 5)

In [110]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()

        self.merchant_type_embedding = nn.Embedding(
            MERCH_TYPE_NCLASSES, params["merchant_type_emb_dim"]
        )
        self.trans_type_embedding = nn.Embedding(
            TRANS_TYPE_NCLASSES, params["trans_type_embedding"]
        )

        embedding_size = (
            params["merchant_type_emb_dim"]
            + params["trans_type_embedding"]
            + 1
        )

        transformer_blocks = []
        for i in range(params["num_layers"]):
            transformer_block = nn.TransformerEncoderLayer(
                d_model=embedding_size,
                nhead=params["transformer_nhead"],
                dim_feedforward=params["transformer_dim_feedforward"],
                dropout=params["transformer_dropout"],
            )
            transformer_blocks.append(
                (f"transformer_block_{i}", transformer_block)
            )

        self.transformer_encoder = nn.Sequential(
            OrderedDict(transformer_blocks)
        )

        self.linear = nn.Linear(
            in_features=embedding_size, out_features=params["dense_unit"]
        )
        self.scorer = nn.Linear(
            in_features=params["dense_unit"],
            out_features=1,
        )

    def forward(self, features):

        merchant_type_emb = self.merchant_type_embedding(features["merchant_type"])
        trans_type_emb = self.trans_type_embedding(features["trans_type"])

        merchant_type_emb = merchant_type_emb * features["merchant_type_mask"].unsqueeze(-1)
        trans_type_emb = trans_type_emb * features["trans_type_mask"].unsqueeze(-1)

        embeddings = torch.cat(
            (merchant_type_emb, trans_type_emb, features["sum"].unsqueeze(-1)),
            dim=-1,
        )
      
        transformer_output = self.transformer_encoder(embeddings)
        pooling = torch.mean(transformer_output, dim=1)
        linear = torch.tanh(self.linear(pooling))

        value = self.scorer(linear)

        return value

### One-batch-check

In [111]:
model = Model()
criterion = nn.MSELoss()
batch = next(iter(train_loader))
output = model(batch['features'])
print(output.shape, batch['targets'].shape)
loss = criterion(output,batch['targets'])
print(loss)

torch.Size([64, 1]) torch.Size([64, 1])
tensor(0.0694, grad_fn=<MseLossBackward>)


## Train loop with [Catalyst](https://github.com/catalyst-team/catalyst)

[A comprehensive step-by-step guide to basic and advanced features](https://github.com/catalyst-team/catalyst#step-by-step-guide).

---



In [112]:
from catalyst import dl, utils
from catalyst.utils import metrics

## Custom metrics for this hackathon

In [113]:
from typing import List, Optional, Sequence, Tuple, Union

import numpy as np
import torch
from catalyst.utils.metrics.functional import preprocess_multi_label_metrics
from catalyst.utils.torch import get_activation_fn


def multi_label_metrics(
    outputs: torch.Tensor,
    targets: torch.Tensor,
    threshold: Union[float, torch.Tensor],
    activation: Optional[str] = None,
    eps: float = 1e-7,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
    """
    Computes multi-label precision for the specified activation and threshold.

    Args:
        outputs (torch.Tensor): NxK tensor that for each of the N examples
            indicates the probability of the example belonging to each of
            the K classes, according to the model.
        targets (torch.Tensor): binary NxK tensort that encodes which of the K
            classes are associated with the N-th input
            (eg: a row [0, 1, 0, 1] indicates that the example is
            associated with classes 2 and 4)
        threshold (float): threshold for for model output
        activation (str): activation to use for model output
        eps (float): epsilon to avoid zero division
    
    Extended version of 
        https://github.com/catalyst-team/catalyst/blob/master/catalyst/utils/metrics/accuracy.py#L58

    Returns:
        computed multi-label metrics
    """
    outputs, targets, _ = preprocess_multi_label_metrics(
        outputs=outputs, targets=targets
    )
    activation_fn = get_activation_fn(activation)
    outputs = activation_fn(outputs)

    outputs = (outputs > threshold).long()

    accuracy = (targets.long() == outputs.long()).sum().float() / np.prod(
        targets.shape
    )

    intersection = (outputs.long() * targets.long()).sum(axis=1).float()
    num_predicted = outputs.long().sum(axis=1).float()
    num_relevant = targets.long().sum(axis=1).float()
    union = num_predicted + num_relevant

    # Precision = ({predicted items} && {relevant items}) / {predicted items}
    precision = intersection / (num_predicted + eps * (num_predicted == 0))
    # Recall = ({predicted items} && {relevant items}) / {relevant items}
    recall = intersection / (num_relevant + eps * (num_relevant == 0))
    # IoU = ({predicted items} && {relevant items}) / ({predicted items} || {relevant items})
    iou = (intersection + eps * (union == 0)) / (union - intersection + eps)

    return accuracy, precision.mean(), recall.mean(), iou.mean()


def precision_at_k(
    actual: torch.Tensor, 
    predicted: torch.Tensor, 
    k: int,
):
    """
    Computes precision at cutoff k for one sample

    Args:
       actual: (torch.Tensor): tensor of length K with predicted item_ids sorted by relevance
       predicted (torch.Tensor): binary tensor that encodes which of the K
           classes are associated with the N-th input
           (eg: a row [0, 1, 0, 1] indicates that the example is
           associated with classes 2 and 4)
       k (int): parameter k of precison@k

    Returns:
       Computed value of precision@k for given sample
    """
    p_at_k = 0.0
    for item in predicted[:k]:
        if actual[item]:
            p_at_k += 1
    p_at_k /= k

    return p_at_k


def average_precision_at_k(
    actual: torch.Tensor, 
    predicted: torch.Tensor, 
    k: int,
) -> float:
    """
    Computes average precision at cutoff k for one sample

    Args:
      actual: (torch.Tensor): tensor of length K with predicted item_ids sorted by relevance
      predicted (torch.Tensor): binary tensor that encodes which of the K
          classes are associated with the N-th input
          (eg: a row [0, 1, 0, 1] indicates that the example is
          associated with classes 2 and 4)
      k (int): parameter k of AP@k

    Returns:
        Computed value of AP@k for given sample
    """
    ap_at_k = 0.0
    for idx, item in enumerate(predicted[:k]):
        if actual[item]:
            ap_at_k += precision_at_k(actual, predicted, k=idx + 1)
    ap_at_k /= min(k, actual.sum().cpu().numpy())
    

    return ap_at_k


def mean_average_precision_at_k(
    output: torch.Tensor, target: torch.Tensor, top_k: Tuple[int, ...] = (1,)
) -> List[float]:
    """
    Computes mean_average_precision_at_k at set of cutoff parameters K

    Args:
       outputs (torch.Tensor): NxK tensor that for each of the N examples
           indicates the probability of the example belonging to each of
           the K classes, according to the model.
       targets (torch.Tensor): binary NxK tensort that encodes which of the K
           classes are associated with the N-th input
           (eg: a row [0, 1, 0, 1] indicates that the example is
           associated with classes 2 and 4)
       top_k (tuple): list of parameters k at which map@k will be computed


    Returns:
       List of computed values of map@k at each cutoff k from topk
    """
    max_k = max(top_k)
    batch_size = target.size(0)

    _, top_indices = output.topk(k=max_k, dim=1, largest=True, sorted=True)

    result = []
    for k in top_k:  # loop over k
        map_at_k = 0.0
        for actual_target, predicted_items in zip(
            target, top_indices
        ):  # loop over samples
            map_at_k += average_precision_at_k(
                actual_target, predicted_items, k
            )
        map_at_k = map_at_k / batch_size
        result.append(map_at_k)

    return result

In [114]:
# What is Runner?
# https://catalyst-team.github.io/catalyst/api/core.html#runner
class CustomRunner(dl.Runner):

    def _handle_batch(self, batch):
        # model train/valid step
        features, targets = batch["features"], batch["targets"]
        logits = self.model(features)
        scores = torch.sigmoid(logits)

        loss = self.criterion(logits, targets)#(torch.log(1+logits), torch.log(1+targets))

        batch_metrics = {
            "loss": loss
        }
        
        self.input = {"features": features, "targets": targets}
        self.output = {"logits": logits, "scores": scores}
        self.batch_metrics.update(batch_metrics)

        if self.is_train_loader:
            loss.backward()
            self.optimizer.step()
            self.optimizer.zero_grad()
    
    def predict_batch(self, batch):
        # model inference step
        batch = utils.maybe_recursive_call(batch, "to", device=self.device)
        logits = self.model(batch["features"])
        scores = torch.sigmoid(logits)
        return scores

In [115]:
model = Model()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

loaders = {"train": train_loader, "valid": valid_loader}

In [116]:
%load_ext tensorboard
%tensorboard --logdir ./logs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 3651820), started 2:03:02 ago. (Use '!kill 3651820' to kill it.)

In [117]:
# For other minimal examples, please follow the link below
# https://github.com/catalyst-team/catalyst#minimal-examples
runner = CustomRunner()
# model training
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=None,
    loaders=loaders,
    logdir="./logs",
    num_epochs=3,
    verbose=True,
    
    load_best_on_end=True,
    overfit=False,  #  <<<--- DO NOT FORGET TO MAKE IT ``False`` 
                    #  (``True`` uses only one batch to check pipeline correctness)
    callbacks=[
        # https://scikit-learn.org/stable/modules/generated/sklearn.metrics.average_precision_score.html
        # dl.AveragePrecisionCallback(input_key="targets", output_key="scores", prefix="ap"),
        # https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_auc_score.html
        # dl.AUCCallback(input_key="targets", output_key="scores", prefix="auc"),
    ],
    main_metric="loss", # "ap/mean", 
    minimize_metric=True,
)





1/3 * Epoch (train):   0% 0/3657 [00:00<?, ?it/s][A[A[A[A



1/3 * Epoch (train):   0% 0/3657 [00:01<?, ?it/s, loss=0.165][A[A[A[A



1/3 * Epoch (train):   0% 1/3657 [00:01<1:07:26,  1.11s/it, loss=0.165][A[A[A[A



1/3 * Epoch (train):   0% 1/3657 [00:01<1:07:26,  1.11s/it, loss=1.074][A[A[A[A



1/3 * Epoch (train):   0% 2/3657 [00:01<53:06,  1.15it/s, loss=1.074]  [A[A[A[A



1/3 * Epoch (train):   0% 2/3657 [00:01<53:06,  1.15it/s, loss=0.238][A[A[A[A



1/3 * Epoch (train):   0% 3/3657 [00:01<42:58,  1.42it/s, loss=0.238][A[A[A[A



1/3 * Epoch (train):   0% 3/3657 [00:02<42:58,  1.42it/s, loss=0.284][A[A[A[A



1/3 * Epoch (train):   0% 4/3657 [00:02<35:43,  1.70it/s, loss=0.284][A[A[A[A



1/3 * Epoch (train):   0% 4/3657 [00:02<35:43,  1.70it/s, loss=0.243][A[A[A[A



1/3 * Epoch (train):   0% 5/3657 [00:02<30:42,  1.98it/s, loss=0.243][A[A[A[A



1/3 * Epoch (train):   0% 5/3657 [00:02<30:42,  1.98it/s, loss=0.233][A[A[A[A



1/3 * Epoch (train):   1% 47/3657 [00:15<18:17,  3.29it/s, loss=0.047][A[A[A[A



1/3 * Epoch (train):   1% 48/3657 [00:15<18:06,  3.32it/s, loss=0.047][A[A[A[A



1/3 * Epoch (train):   1% 48/3657 [00:16<18:06,  3.32it/s, loss=0.105][A[A[A[A



1/3 * Epoch (train):   1% 49/3657 [00:16<18:05,  3.32it/s, loss=0.105][A[A[A[A



1/3 * Epoch (train):   1% 49/3657 [00:16<18:05,  3.32it/s, loss=0.464][A[A[A[A



1/3 * Epoch (train):   1% 50/3657 [00:16<18:09,  3.31it/s, loss=0.464][A[A[A[A



1/3 * Epoch (train):   1% 50/3657 [00:16<18:09,  3.31it/s, loss=0.263][A[A[A[A



1/3 * Epoch (train):   1% 51/3657 [00:16<18:01,  3.33it/s, loss=0.263][A[A[A[A



1/3 * Epoch (train):   1% 51/3657 [00:16<18:01,  3.33it/s, loss=0.089][A[A[A[A



1/3 * Epoch (train):   1% 52/3657 [00:16<17:48,  3.37it/s, loss=0.089][A[A[A[A



1/3 * Epoch (train):   1% 52/3657 [00:17<17:48,  3.37it/s, loss=0.273][A[A[A[A



1/3 * Epoch (train):   1% 53/3657 [00:17<17:26,  3.44i

1/3 * Epoch (train):   3% 95/3657 [00:29<17:37,  3.37it/s, loss=0.076][A[A[A[A



1/3 * Epoch (train):   3% 95/3657 [00:29<17:37,  3.37it/s, loss=0.058][A[A[A[A



1/3 * Epoch (train):   3% 96/3657 [00:29<17:39,  3.36it/s, loss=0.058][A[A[A[A



1/3 * Epoch (train):   3% 96/3657 [00:30<17:39,  3.36it/s, loss=0.075][A[A[A[A



1/3 * Epoch (train):   3% 97/3657 [00:30<17:22,  3.41it/s, loss=0.075][A[A[A[A



1/3 * Epoch (train):   3% 97/3657 [00:30<17:22,  3.41it/s, loss=0.078][A[A[A[A



1/3 * Epoch (train):   3% 98/3657 [00:30<17:35,  3.37it/s, loss=0.078][A[A[A[A



1/3 * Epoch (train):   3% 98/3657 [00:30<17:35,  3.37it/s, loss=0.127][A[A[A[A



1/3 * Epoch (train):   3% 99/3657 [00:30<17:53,  3.31it/s, loss=0.127][A[A[A[A



1/3 * Epoch (train):   3% 99/3657 [00:31<17:53,  3.31it/s, loss=0.053][A[A[A[A



1/3 * Epoch (train):   3% 100/3657 [00:31<18:19,  3.23it/s, loss=0.053][A[A[A[A



1/3 * Epoch (train):   3% 100/3657 [00:31<18:19,  3.2

1/3 * Epoch (train):   4% 142/3657 [00:42<16:25,  3.57it/s, loss=0.291][A[A[A[A



1/3 * Epoch (train):   4% 142/3657 [00:43<16:25,  3.57it/s, loss=0.116][A[A[A[A



1/3 * Epoch (train):   4% 143/3657 [00:43<16:15,  3.60it/s, loss=0.116][A[A[A[A



1/3 * Epoch (train):   4% 143/3657 [00:43<16:15,  3.60it/s, loss=0.069][A[A[A[A



1/3 * Epoch (train):   4% 144/3657 [00:43<16:02,  3.65it/s, loss=0.069][A[A[A[A



1/3 * Epoch (train):   4% 144/3657 [00:43<16:02,  3.65it/s, loss=0.252][A[A[A[A



1/3 * Epoch (train):   4% 145/3657 [00:43<15:57,  3.67it/s, loss=0.252][A[A[A[A



1/3 * Epoch (train):   4% 145/3657 [00:44<15:57,  3.67it/s, loss=0.732][A[A[A[A



1/3 * Epoch (train):   4% 146/3657 [00:44<15:51,  3.69it/s, loss=0.732][A[A[A[A



1/3 * Epoch (train):   4% 146/3657 [00:44<15:51,  3.69it/s, loss=0.101][A[A[A[A



1/3 * Epoch (train):   4% 147/3657 [00:44<15:52,  3.69it/s, loss=0.101][A[A[A[A



1/3 * Epoch (train):   4% 147/3657 [00:44<1

1/3 * Epoch (train):   5% 189/3657 [00:56<16:40,  3.47it/s, loss=0.031][A[A[A[A



1/3 * Epoch (train):   5% 189/3657 [00:56<16:40,  3.47it/s, loss=0.180][A[A[A[A



1/3 * Epoch (train):   5% 190/3657 [00:56<16:12,  3.57it/s, loss=0.180][A[A[A[A



1/3 * Epoch (train):   5% 190/3657 [00:56<16:12,  3.57it/s, loss=0.808][A[A[A[A



1/3 * Epoch (train):   5% 191/3657 [00:56<16:01,  3.60it/s, loss=0.808][A[A[A[A



1/3 * Epoch (train):   5% 191/3657 [00:57<16:01,  3.60it/s, loss=0.052][A[A[A[A



1/3 * Epoch (train):   5% 192/3657 [00:57<15:55,  3.63it/s, loss=0.052][A[A[A[A



1/3 * Epoch (train):   5% 192/3657 [00:57<15:55,  3.63it/s, loss=0.189][A[A[A[A



1/3 * Epoch (train):   5% 193/3657 [00:57<15:45,  3.67it/s, loss=0.189][A[A[A[A



1/3 * Epoch (train):   5% 193/3657 [00:57<15:45,  3.67it/s, loss=0.070][A[A[A[A



1/3 * Epoch (train):   5% 194/3657 [00:57<15:38,  3.69it/s, loss=0.070][A[A[A[A



1/3 * Epoch (train):   5% 194/3657 [00:57<1

1/3 * Epoch (train):   6% 236/3657 [01:09<16:00,  3.56it/s, loss=0.096][A[A[A[A



1/3 * Epoch (train):   6% 236/3657 [01:09<16:00,  3.56it/s, loss=0.017][A[A[A[A



1/3 * Epoch (train):   6% 237/3657 [01:09<16:14,  3.51it/s, loss=0.017][A[A[A[A



1/3 * Epoch (train):   6% 237/3657 [01:10<16:14,  3.51it/s, loss=0.168][A[A[A[A



1/3 * Epoch (train):   7% 238/3657 [01:10<16:19,  3.49it/s, loss=0.168][A[A[A[A



1/3 * Epoch (train):   7% 238/3657 [01:10<16:19,  3.49it/s, loss=0.116][A[A[A[A



1/3 * Epoch (train):   7% 239/3657 [01:10<16:45,  3.40it/s, loss=0.116][A[A[A[A



1/3 * Epoch (train):   7% 239/3657 [01:10<16:45,  3.40it/s, loss=0.158][A[A[A[A



1/3 * Epoch (train):   7% 240/3657 [01:10<16:38,  3.42it/s, loss=0.158][A[A[A[A



1/3 * Epoch (train):   7% 240/3657 [01:11<16:38,  3.42it/s, loss=0.157][A[A[A[A



1/3 * Epoch (train):   7% 241/3657 [01:11<17:05,  3.33it/s, loss=0.157][A[A[A[A



1/3 * Epoch (train):   7% 241/3657 [01:11<1

1/3 * Epoch (train):   8% 283/3657 [01:23<17:22,  3.24it/s, loss=0.174][A[A[A[A



1/3 * Epoch (train):   8% 283/3657 [01:23<17:22,  3.24it/s, loss=0.234][A[A[A[A



1/3 * Epoch (train):   8% 284/3657 [01:23<17:07,  3.28it/s, loss=0.234][A[A[A[A



1/3 * Epoch (train):   8% 284/3657 [01:24<17:07,  3.28it/s, loss=0.244][A[A[A[A



1/3 * Epoch (train):   8% 285/3657 [01:24<16:59,  3.31it/s, loss=0.244][A[A[A[A



1/3 * Epoch (train):   8% 285/3657 [01:24<16:59,  3.31it/s, loss=0.151][A[A[A[A



1/3 * Epoch (train):   8% 286/3657 [01:24<16:58,  3.31it/s, loss=0.151][A[A[A[A



1/3 * Epoch (train):   8% 286/3657 [01:24<16:58,  3.31it/s, loss=2.714][A[A[A[A



1/3 * Epoch (train):   8% 287/3657 [01:24<16:58,  3.31it/s, loss=2.714][A[A[A[A



1/3 * Epoch (train):   8% 287/3657 [01:25<16:58,  3.31it/s, loss=0.087][A[A[A[A



1/3 * Epoch (train):   8% 288/3657 [01:25<16:59,  3.31it/s, loss=0.087][A[A[A[A



1/3 * Epoch (train):   8% 288/3657 [01:25<1

1/3 * Epoch (train):   9% 330/3657 [01:37<16:18,  3.40it/s, loss=0.304][A[A[A[A



1/3 * Epoch (train):   9% 330/3657 [01:37<16:18,  3.40it/s, loss=0.120][A[A[A[A



1/3 * Epoch (train):   9% 331/3657 [01:37<16:35,  3.34it/s, loss=0.120][A[A[A[A



1/3 * Epoch (train):   9% 331/3657 [01:38<16:35,  3.34it/s, loss=0.088][A[A[A[A



1/3 * Epoch (train):   9% 332/3657 [01:38<16:17,  3.40it/s, loss=0.088][A[A[A[A



1/3 * Epoch (train):   9% 332/3657 [01:38<16:17,  3.40it/s, loss=0.116][A[A[A[A



1/3 * Epoch (train):   9% 333/3657 [01:38<16:03,  3.45it/s, loss=0.116][A[A[A[A



1/3 * Epoch (train):   9% 333/3657 [01:38<16:03,  3.45it/s, loss=0.200][A[A[A[A



1/3 * Epoch (train):   9% 334/3657 [01:38<15:56,  3.47it/s, loss=0.200][A[A[A[A



1/3 * Epoch (train):   9% 334/3657 [01:38<15:56,  3.47it/s, loss=0.845][A[A[A[A



1/3 * Epoch (train):   9% 335/3657 [01:38<15:38,  3.54it/s, loss=0.845][A[A[A[A



1/3 * Epoch (train):   9% 335/3657 [01:39<1

1/3 * Epoch (train):  10% 377/3657 [01:50<14:15,  3.84it/s, loss=0.367][A[A[A[A



1/3 * Epoch (train):  10% 377/3657 [01:50<14:15,  3.84it/s, loss=0.144][A[A[A[A



1/3 * Epoch (train):  10% 378/3657 [01:50<14:11,  3.85it/s, loss=0.144][A[A[A[A



1/3 * Epoch (train):  10% 378/3657 [01:50<14:11,  3.85it/s, loss=0.061][A[A[A[A



1/3 * Epoch (train):  10% 379/3657 [01:50<14:14,  3.84it/s, loss=0.061][A[A[A[A



1/3 * Epoch (train):  10% 379/3657 [01:50<14:14,  3.84it/s, loss=0.106][A[A[A[A



1/3 * Epoch (train):  10% 380/3657 [01:50<14:10,  3.85it/s, loss=0.106][A[A[A[A



1/3 * Epoch (train):  10% 380/3657 [01:51<14:10,  3.85it/s, loss=0.316][A[A[A[A



1/3 * Epoch (train):  10% 381/3657 [01:51<14:06,  3.87it/s, loss=0.316][A[A[A[A



1/3 * Epoch (train):  10% 381/3657 [01:51<14:06,  3.87it/s, loss=0.158][A[A[A[A



1/3 * Epoch (train):  10% 382/3657 [01:51<14:07,  3.86it/s, loss=0.158][A[A[A[A



1/3 * Epoch (train):  10% 382/3657 [01:51<1

1/3 * Epoch (train):  12% 424/3657 [02:02<15:02,  3.58it/s, loss=0.250][A[A[A[A



1/3 * Epoch (train):  12% 424/3657 [02:03<15:02,  3.58it/s, loss=0.232][A[A[A[A



1/3 * Epoch (train):  12% 425/3657 [02:03<14:45,  3.65it/s, loss=0.232][A[A[A[A



1/3 * Epoch (train):  12% 425/3657 [02:03<14:45,  3.65it/s, loss=0.755][A[A[A[A



1/3 * Epoch (train):  12% 426/3657 [02:03<14:36,  3.69it/s, loss=0.755][A[A[A[A



1/3 * Epoch (train):  12% 426/3657 [02:03<14:36,  3.69it/s, loss=0.033][A[A[A[A



1/3 * Epoch (train):  12% 427/3657 [02:03<14:31,  3.71it/s, loss=0.033][A[A[A[A



1/3 * Epoch (train):  12% 427/3657 [02:04<14:31,  3.71it/s, loss=0.130][A[A[A[A



1/3 * Epoch (train):  12% 428/3657 [02:04<14:35,  3.69it/s, loss=0.130][A[A[A[A



1/3 * Epoch (train):  12% 428/3657 [02:04<14:35,  3.69it/s, loss=0.137][A[A[A[A



1/3 * Epoch (train):  12% 429/3657 [02:04<14:51,  3.62it/s, loss=0.137][A[A[A[A



1/3 * Epoch (train):  12% 429/3657 [02:04<1

1/3 * Epoch (train):  13% 471/3657 [02:16<15:41,  3.38it/s, loss=0.179][A[A[A[A



1/3 * Epoch (train):  13% 471/3657 [02:16<15:41,  3.38it/s, loss=0.042][A[A[A[A



1/3 * Epoch (train):  13% 472/3657 [02:16<16:00,  3.32it/s, loss=0.042][A[A[A[A



1/3 * Epoch (train):  13% 472/3657 [02:17<16:00,  3.32it/s, loss=0.216][A[A[A[A



1/3 * Epoch (train):  13% 473/3657 [02:17<16:33,  3.20it/s, loss=0.216][A[A[A[A



1/3 * Epoch (train):  13% 473/3657 [02:17<16:33,  3.20it/s, loss=0.037][A[A[A[A



1/3 * Epoch (train):  13% 474/3657 [02:17<16:48,  3.16it/s, loss=0.037][A[A[A[A



1/3 * Epoch (train):  13% 474/3657 [02:17<16:48,  3.16it/s, loss=0.098][A[A[A[A



1/3 * Epoch (train):  13% 475/3657 [02:17<16:09,  3.28it/s, loss=0.098][A[A[A[A



1/3 * Epoch (train):  13% 475/3657 [02:17<16:09,  3.28it/s, loss=0.059][A[A[A[A



1/3 * Epoch (train):  13% 476/3657 [02:17<15:47,  3.36it/s, loss=0.059][A[A[A[A



1/3 * Epoch (train):  13% 476/3657 [02:18<1

1/3 * Epoch (train):  14% 518/3657 [02:29<15:07,  3.46it/s, loss=0.322][A[A[A[A



1/3 * Epoch (train):  14% 518/3657 [02:30<15:07,  3.46it/s, loss=0.159][A[A[A[A



1/3 * Epoch (train):  14% 519/3657 [02:30<15:16,  3.43it/s, loss=0.159][A[A[A[A



1/3 * Epoch (train):  14% 519/3657 [02:30<15:16,  3.43it/s, loss=0.078][A[A[A[A



1/3 * Epoch (train):  14% 520/3657 [02:30<15:18,  3.41it/s, loss=0.078][A[A[A[A



1/3 * Epoch (train):  14% 520/3657 [02:30<15:18,  3.41it/s, loss=0.029][A[A[A[A



1/3 * Epoch (train):  14% 521/3657 [02:30<15:23,  3.40it/s, loss=0.029][A[A[A[A



1/3 * Epoch (train):  14% 521/3657 [02:31<15:23,  3.40it/s, loss=0.173][A[A[A[A



1/3 * Epoch (train):  14% 522/3657 [02:31<15:07,  3.45it/s, loss=0.173][A[A[A[A



1/3 * Epoch (train):  14% 522/3657 [02:31<15:07,  3.45it/s, loss=0.148][A[A[A[A



1/3 * Epoch (train):  14% 523/3657 [02:31<15:07,  3.46it/s, loss=0.148][A[A[A[A



1/3 * Epoch (train):  14% 523/3657 [02:31<1

1/3 * Epoch (train):  15% 565/3657 [02:43<15:07,  3.41it/s, loss=0.260][A[A[A[A



1/3 * Epoch (train):  15% 565/3657 [02:43<15:07,  3.41it/s, loss=0.084][A[A[A[A



1/3 * Epoch (train):  15% 566/3657 [02:43<15:02,  3.42it/s, loss=0.084][A[A[A[A



1/3 * Epoch (train):  15% 566/3657 [02:44<15:02,  3.42it/s, loss=2.057][A[A[A[A



1/3 * Epoch (train):  16% 567/3657 [02:44<15:19,  3.36it/s, loss=2.057][A[A[A[A



1/3 * Epoch (train):  16% 567/3657 [02:44<15:19,  3.36it/s, loss=0.261][A[A[A[A



1/3 * Epoch (train):  16% 568/3657 [02:44<15:07,  3.40it/s, loss=0.261][A[A[A[A



1/3 * Epoch (train):  16% 568/3657 [02:44<15:07,  3.40it/s, loss=6.120][A[A[A[A



1/3 * Epoch (train):  16% 569/3657 [02:44<15:01,  3.43it/s, loss=6.120][A[A[A[A



1/3 * Epoch (train):  16% 569/3657 [02:44<15:01,  3.43it/s, loss=0.115][A[A[A[A



1/3 * Epoch (train):  16% 570/3657 [02:44<14:53,  3.45it/s, loss=0.115][A[A[A[A



1/3 * Epoch (train):  16% 570/3657 [02:45<1

1/3 * Epoch (train):  17% 612/3657 [02:56<14:21,  3.54it/s, loss=2.154][A[A[A[A



1/3 * Epoch (train):  17% 612/3657 [02:57<14:21,  3.54it/s, loss=0.103][A[A[A[A



1/3 * Epoch (train):  17% 613/3657 [02:57<14:31,  3.49it/s, loss=0.103][A[A[A[A



1/3 * Epoch (train):  17% 613/3657 [02:57<14:31,  3.49it/s, loss=0.094][A[A[A[A



1/3 * Epoch (train):  17% 614/3657 [02:57<14:45,  3.44it/s, loss=0.094][A[A[A[A



1/3 * Epoch (train):  17% 614/3657 [02:57<14:45,  3.44it/s, loss=0.084][A[A[A[A



1/3 * Epoch (train):  17% 615/3657 [02:57<14:39,  3.46it/s, loss=0.084][A[A[A[A



1/3 * Epoch (train):  17% 615/3657 [02:58<14:39,  3.46it/s, loss=0.129][A[A[A[A



1/3 * Epoch (train):  17% 616/3657 [02:58<14:29,  3.50it/s, loss=0.129][A[A[A[A



1/3 * Epoch (train):  17% 616/3657 [02:58<14:29,  3.50it/s, loss=0.044][A[A[A[A



1/3 * Epoch (train):  17% 617/3657 [02:58<14:22,  3.52it/s, loss=0.044][A[A[A[A



1/3 * Epoch (train):  17% 617/3657 [02:58<1

1/3 * Epoch (train):  18% 659/3657 [03:10<14:30,  3.44it/s, loss=0.226][A[A[A[A



1/3 * Epoch (train):  18% 659/3657 [03:10<14:30,  3.44it/s, loss=0.060][A[A[A[A



1/3 * Epoch (train):  18% 660/3657 [03:10<14:39,  3.41it/s, loss=0.060][A[A[A[A



1/3 * Epoch (train):  18% 660/3657 [03:11<14:39,  3.41it/s, loss=0.480][A[A[A[A



1/3 * Epoch (train):  18% 661/3657 [03:11<14:28,  3.45it/s, loss=0.480][A[A[A[A



1/3 * Epoch (train):  18% 661/3657 [03:11<14:28,  3.45it/s, loss=3.828][A[A[A[A



1/3 * Epoch (train):  18% 662/3657 [03:11<14:35,  3.42it/s, loss=3.828][A[A[A[A



1/3 * Epoch (train):  18% 662/3657 [03:11<14:35,  3.42it/s, loss=0.076][A[A[A[A



1/3 * Epoch (train):  18% 663/3657 [03:11<14:43,  3.39it/s, loss=0.076][A[A[A[A



1/3 * Epoch (train):  18% 663/3657 [03:11<14:43,  3.39it/s, loss=0.038][A[A[A[A



1/3 * Epoch (train):  18% 664/3657 [03:11<14:38,  3.41it/s, loss=0.038][A[A[A[A



1/3 * Epoch (train):  18% 664/3657 [03:12<1

1/3 * Epoch (train):  19% 706/3657 [03:24<14:39,  3.36it/s, loss=0.236][A[A[A[A



1/3 * Epoch (train):  19% 706/3657 [03:24<14:39,  3.36it/s, loss=0.097][A[A[A[A



1/3 * Epoch (train):  19% 707/3657 [03:24<14:32,  3.38it/s, loss=0.097][A[A[A[A



1/3 * Epoch (train):  19% 707/3657 [03:24<14:32,  3.38it/s, loss=0.100][A[A[A[A



1/3 * Epoch (train):  19% 708/3657 [03:24<14:19,  3.43it/s, loss=0.100][A[A[A[A



1/3 * Epoch (train):  19% 708/3657 [03:24<14:19,  3.43it/s, loss=0.134][A[A[A[A



1/3 * Epoch (train):  19% 709/3657 [03:24<14:01,  3.50it/s, loss=0.134][A[A[A[A



1/3 * Epoch (train):  19% 709/3657 [03:25<14:01,  3.50it/s, loss=0.309][A[A[A[A



1/3 * Epoch (train):  19% 710/3657 [03:25<13:43,  3.58it/s, loss=0.309][A[A[A[A



1/3 * Epoch (train):  19% 710/3657 [03:25<13:43,  3.58it/s, loss=0.108][A[A[A[A



1/3 * Epoch (train):  19% 711/3657 [03:25<13:34,  3.62it/s, loss=0.108][A[A[A[A



1/3 * Epoch (train):  19% 711/3657 [03:25<1

1/3 * Epoch (train):  21% 753/3657 [03:37<13:36,  3.56it/s, loss=0.017][A[A[A[A



1/3 * Epoch (train):  21% 753/3657 [03:37<13:36,  3.56it/s, loss=0.104][A[A[A[A



1/3 * Epoch (train):  21% 754/3657 [03:37<13:15,  3.65it/s, loss=0.104][A[A[A[A



1/3 * Epoch (train):  21% 754/3657 [03:37<13:15,  3.65it/s, loss=0.293][A[A[A[A



1/3 * Epoch (train):  21% 755/3657 [03:37<12:58,  3.73it/s, loss=0.293][A[A[A[A



1/3 * Epoch (train):  21% 755/3657 [03:38<12:58,  3.73it/s, loss=0.264][A[A[A[A



1/3 * Epoch (train):  21% 756/3657 [03:38<12:52,  3.76it/s, loss=0.264][A[A[A[A



1/3 * Epoch (train):  21% 756/3657 [03:38<12:52,  3.76it/s, loss=0.065][A[A[A[A



1/3 * Epoch (train):  21% 757/3657 [03:38<12:43,  3.80it/s, loss=0.065][A[A[A[A



1/3 * Epoch (train):  21% 757/3657 [03:38<12:43,  3.80it/s, loss=0.049][A[A[A[A



1/3 * Epoch (train):  21% 758/3657 [03:38<12:40,  3.81it/s, loss=0.049][A[A[A[A



1/3 * Epoch (train):  21% 758/3657 [03:38<1

1/3 * Epoch (train):  22% 800/3657 [03:49<12:46,  3.73it/s, loss=0.517][A[A[A[A



1/3 * Epoch (train):  22% 800/3657 [03:50<12:46,  3.73it/s, loss=0.214][A[A[A[A



1/3 * Epoch (train):  22% 801/3657 [03:50<12:45,  3.73it/s, loss=0.214][A[A[A[A



1/3 * Epoch (train):  22% 801/3657 [03:50<12:45,  3.73it/s, loss=0.069][A[A[A[A



1/3 * Epoch (train):  22% 802/3657 [03:50<12:33,  3.79it/s, loss=0.069][A[A[A[A



1/3 * Epoch (train):  22% 802/3657 [03:50<12:33,  3.79it/s, loss=0.044][A[A[A[A



1/3 * Epoch (train):  22% 803/3657 [03:50<12:31,  3.80it/s, loss=0.044][A[A[A[A



1/3 * Epoch (train):  22% 803/3657 [03:50<12:31,  3.80it/s, loss=0.622][A[A[A[A



1/3 * Epoch (train):  22% 804/3657 [03:50<12:47,  3.72it/s, loss=0.622][A[A[A[A



1/3 * Epoch (train):  22% 804/3657 [03:51<12:47,  3.72it/s, loss=0.192][A[A[A[A



1/3 * Epoch (train):  22% 805/3657 [03:51<12:39,  3.75it/s, loss=0.192][A[A[A[A



1/3 * Epoch (train):  22% 805/3657 [03:51<1

1/3 * Epoch (train):  23% 847/3657 [04:03<13:20,  3.51it/s, loss=0.125][A[A[A[A



1/3 * Epoch (train):  23% 847/3657 [04:03<13:20,  3.51it/s, loss=0.055][A[A[A[A



1/3 * Epoch (train):  23% 848/3657 [04:03<13:10,  3.56it/s, loss=0.055][A[A[A[A



1/3 * Epoch (train):  23% 848/3657 [04:03<13:10,  3.56it/s, loss=0.150][A[A[A[A



1/3 * Epoch (train):  23% 849/3657 [04:03<13:02,  3.59it/s, loss=0.150][A[A[A[A



1/3 * Epoch (train):  23% 849/3657 [04:03<13:02,  3.59it/s, loss=0.056][A[A[A[A



1/3 * Epoch (train):  23% 850/3657 [04:03<13:27,  3.48it/s, loss=0.056][A[A[A[A



1/3 * Epoch (train):  23% 850/3657 [04:04<13:27,  3.48it/s, loss=0.086][A[A[A[A



1/3 * Epoch (train):  23% 851/3657 [04:04<13:26,  3.48it/s, loss=0.086][A[A[A[A



1/3 * Epoch (train):  23% 851/3657 [04:04<13:26,  3.48it/s, loss=0.147][A[A[A[A



1/3 * Epoch (train):  23% 852/3657 [04:04<13:17,  3.52it/s, loss=0.147][A[A[A[A



1/3 * Epoch (train):  23% 852/3657 [04:04<1

1/3 * Epoch (train):  24% 894/3657 [04:16<12:53,  3.57it/s, loss=0.119][A[A[A[A



1/3 * Epoch (train):  24% 894/3657 [04:16<12:53,  3.57it/s, loss=0.124][A[A[A[A



1/3 * Epoch (train):  24% 895/3657 [04:16<12:46,  3.60it/s, loss=0.124][A[A[A[A



1/3 * Epoch (train):  24% 895/3657 [04:16<12:46,  3.60it/s, loss=0.136][A[A[A[A



1/3 * Epoch (train):  25% 896/3657 [04:16<13:00,  3.54it/s, loss=0.136][A[A[A[A



1/3 * Epoch (train):  25% 896/3657 [04:16<13:00,  3.54it/s, loss=0.141][A[A[A[A



1/3 * Epoch (train):  25% 897/3657 [04:16<12:56,  3.55it/s, loss=0.141][A[A[A[A



1/3 * Epoch (train):  25% 897/3657 [04:17<12:56,  3.55it/s, loss=0.288][A[A[A[A



1/3 * Epoch (train):  25% 898/3657 [04:17<12:55,  3.56it/s, loss=0.288][A[A[A[A



1/3 * Epoch (train):  25% 898/3657 [04:17<12:55,  3.56it/s, loss=0.078][A[A[A[A



1/3 * Epoch (train):  25% 899/3657 [04:17<13:14,  3.47it/s, loss=0.078][A[A[A[A



1/3 * Epoch (train):  25% 899/3657 [04:17<1

1/3 * Epoch (train):  26% 941/3657 [04:29<13:03,  3.46it/s, loss=0.170][A[A[A[A



1/3 * Epoch (train):  26% 941/3657 [04:29<13:03,  3.46it/s, loss=0.744][A[A[A[A



1/3 * Epoch (train):  26% 942/3657 [04:29<13:09,  3.44it/s, loss=0.744][A[A[A[A



1/3 * Epoch (train):  26% 942/3657 [04:30<13:09,  3.44it/s, loss=0.040][A[A[A[A



1/3 * Epoch (train):  26% 943/3657 [04:30<12:58,  3.49it/s, loss=0.040][A[A[A[A



1/3 * Epoch (train):  26% 943/3657 [04:30<12:58,  3.49it/s, loss=0.556][A[A[A[A



1/3 * Epoch (train):  26% 944/3657 [04:30<12:45,  3.55it/s, loss=0.556][A[A[A[A



1/3 * Epoch (train):  26% 944/3657 [04:30<12:45,  3.55it/s, loss=0.068][A[A[A[A



1/3 * Epoch (train):  26% 945/3657 [04:30<12:48,  3.53it/s, loss=0.068][A[A[A[A



1/3 * Epoch (train):  26% 945/3657 [04:30<12:48,  3.53it/s, loss=0.198][A[A[A[A



1/3 * Epoch (train):  26% 946/3657 [04:30<13:01,  3.47it/s, loss=0.198][A[A[A[A



1/3 * Epoch (train):  26% 946/3657 [04:31<1

1/3 * Epoch (train):  27% 988/3657 [04:42<12:30,  3.56it/s, loss=0.492][A[A[A[A



1/3 * Epoch (train):  27% 988/3657 [04:43<12:30,  3.56it/s, loss=0.200][A[A[A[A



1/3 * Epoch (train):  27% 989/3657 [04:43<12:35,  3.53it/s, loss=0.200][A[A[A[A



1/3 * Epoch (train):  27% 989/3657 [04:43<12:35,  3.53it/s, loss=0.269][A[A[A[A



1/3 * Epoch (train):  27% 990/3657 [04:43<12:46,  3.48it/s, loss=0.269][A[A[A[A



1/3 * Epoch (train):  27% 990/3657 [04:43<12:46,  3.48it/s, loss=0.078][A[A[A[A



1/3 * Epoch (train):  27% 991/3657 [04:43<13:08,  3.38it/s, loss=0.078][A[A[A[A



1/3 * Epoch (train):  27% 991/3657 [04:44<13:08,  3.38it/s, loss=0.049][A[A[A[A



1/3 * Epoch (train):  27% 992/3657 [04:44<13:04,  3.40it/s, loss=0.049][A[A[A[A



1/3 * Epoch (train):  27% 992/3657 [04:44<13:04,  3.40it/s, loss=0.040][A[A[A[A



1/3 * Epoch (train):  27% 993/3657 [04:44<13:04,  3.40it/s, loss=0.040][A[A[A[A



1/3 * Epoch (train):  27% 993/3657 [04:44<1

1/3 * Epoch (train):  28% 1034/3657 [04:56<12:43,  3.44it/s, loss=0.072][A[A[A[A



1/3 * Epoch (train):  28% 1035/3657 [04:56<12:35,  3.47it/s, loss=0.072][A[A[A[A



1/3 * Epoch (train):  28% 1035/3657 [04:56<12:35,  3.47it/s, loss=0.036][A[A[A[A



1/3 * Epoch (train):  28% 1036/3657 [04:56<12:33,  3.48it/s, loss=0.036][A[A[A[A



1/3 * Epoch (train):  28% 1036/3657 [04:57<12:33,  3.48it/s, loss=0.122][A[A[A[A



1/3 * Epoch (train):  28% 1037/3657 [04:57<12:39,  3.45it/s, loss=0.122][A[A[A[A



1/3 * Epoch (train):  28% 1037/3657 [04:57<12:39,  3.45it/s, loss=0.067][A[A[A[A



1/3 * Epoch (train):  28% 1038/3657 [04:57<12:38,  3.45it/s, loss=0.067][A[A[A[A



1/3 * Epoch (train):  28% 1038/3657 [04:57<12:38,  3.45it/s, loss=0.604][A[A[A[A



1/3 * Epoch (train):  28% 1039/3657 [04:57<12:56,  3.37it/s, loss=0.604][A[A[A[A



1/3 * Epoch (train):  28% 1039/3657 [04:58<12:56,  3.37it/s, loss=0.028][A[A[A[A



1/3 * Epoch (train):  28% 1040/3

1/3 * Epoch (train):  30% 1081/3657 [05:09<12:12,  3.52it/s, loss=0.141][A[A[A[A



1/3 * Epoch (train):  30% 1081/3657 [05:10<12:12,  3.52it/s, loss=0.009][A[A[A[A



1/3 * Epoch (train):  30% 1082/3657 [05:10<12:06,  3.54it/s, loss=0.009][A[A[A[A



1/3 * Epoch (train):  30% 1082/3657 [05:10<12:06,  3.54it/s, loss=0.474][A[A[A[A



1/3 * Epoch (train):  30% 1083/3657 [05:10<11:57,  3.59it/s, loss=0.474][A[A[A[A



1/3 * Epoch (train):  30% 1083/3657 [05:10<11:57,  3.59it/s, loss=0.062][A[A[A[A



1/3 * Epoch (train):  30% 1084/3657 [05:10<12:01,  3.56it/s, loss=0.062][A[A[A[A



1/3 * Epoch (train):  30% 1084/3657 [05:11<12:01,  3.56it/s, loss=0.282][A[A[A[A



1/3 * Epoch (train):  30% 1085/3657 [05:11<12:01,  3.56it/s, loss=0.282][A[A[A[A



1/3 * Epoch (train):  30% 1085/3657 [05:11<12:01,  3.56it/s, loss=0.077][A[A[A[A



1/3 * Epoch (train):  30% 1086/3657 [05:11<11:57,  3.58it/s, loss=0.077][A[A[A[A



1/3 * Epoch (train):  30% 1086/3

1/3 * Epoch (train):  31% 1127/3657 [05:23<12:04,  3.49it/s, loss=0.226][A[A[A[A



1/3 * Epoch (train):  31% 1128/3657 [05:23<12:07,  3.48it/s, loss=0.226][A[A[A[A



1/3 * Epoch (train):  31% 1128/3657 [05:23<12:07,  3.48it/s, loss=0.091][A[A[A[A



1/3 * Epoch (train):  31% 1129/3657 [05:23<11:58,  3.52it/s, loss=0.091][A[A[A[A



1/3 * Epoch (train):  31% 1129/3657 [05:24<11:58,  3.52it/s, loss=0.128][A[A[A[A



1/3 * Epoch (train):  31% 1130/3657 [05:24<12:03,  3.49it/s, loss=0.128][A[A[A[A



1/3 * Epoch (train):  31% 1130/3657 [05:24<12:03,  3.49it/s, loss=0.079][A[A[A[A



1/3 * Epoch (train):  31% 1131/3657 [05:24<11:53,  3.54it/s, loss=0.079][A[A[A[A



1/3 * Epoch (train):  31% 1131/3657 [05:24<11:53,  3.54it/s, loss=0.128][A[A[A[A



1/3 * Epoch (train):  31% 1132/3657 [05:24<12:07,  3.47it/s, loss=0.128][A[A[A[A



1/3 * Epoch (train):  31% 1132/3657 [05:24<12:07,  3.47it/s, loss=0.059][A[A[A[A



1/3 * Epoch (train):  31% 1133/3

1/3 * Epoch (train):  32% 1174/3657 [05:37<11:54,  3.48it/s, loss=0.084][A[A[A[A



1/3 * Epoch (train):  32% 1174/3657 [05:37<11:54,  3.48it/s, loss=0.063][A[A[A[A



1/3 * Epoch (train):  32% 1175/3657 [05:37<12:05,  3.42it/s, loss=0.063][A[A[A[A



1/3 * Epoch (train):  32% 1175/3657 [05:37<12:05,  3.42it/s, loss=0.103][A[A[A[A



1/3 * Epoch (train):  32% 1176/3657 [05:37<12:10,  3.40it/s, loss=0.103][A[A[A[A



1/3 * Epoch (train):  32% 1176/3657 [05:37<12:10,  3.40it/s, loss=0.162][A[A[A[A



1/3 * Epoch (train):  32% 1177/3657 [05:37<12:08,  3.40it/s, loss=0.162][A[A[A[A



1/3 * Epoch (train):  32% 1177/3657 [05:38<12:08,  3.40it/s, loss=0.142][A[A[A[A



1/3 * Epoch (train):  32% 1178/3657 [05:38<12:15,  3.37it/s, loss=0.142][A[A[A[A



1/3 * Epoch (train):  32% 1178/3657 [05:38<12:15,  3.37it/s, loss=0.257][A[A[A[A



1/3 * Epoch (train):  32% 1179/3657 [05:38<12:10,  3.39it/s, loss=0.257][A[A[A[A



1/3 * Epoch (train):  32% 1179/3

1/3 * Epoch (train):  33% 1220/3657 [05:51<12:29,  3.25it/s, loss=0.141][A[A[A[A



1/3 * Epoch (train):  33% 1221/3657 [05:51<12:28,  3.26it/s, loss=0.141][A[A[A[A



1/3 * Epoch (train):  33% 1221/3657 [05:51<12:28,  3.26it/s, loss=0.137][A[A[A[A



1/3 * Epoch (train):  33% 1222/3657 [05:51<12:23,  3.28it/s, loss=0.137][A[A[A[A



1/3 * Epoch (train):  33% 1222/3657 [05:51<12:23,  3.28it/s, loss=0.103][A[A[A[A



1/3 * Epoch (train):  33% 1223/3657 [05:51<12:21,  3.28it/s, loss=0.103][A[A[A[A



1/3 * Epoch (train):  33% 1223/3657 [05:51<12:21,  3.28it/s, loss=0.127][A[A[A[A



1/3 * Epoch (train):  33% 1224/3657 [05:51<12:20,  3.29it/s, loss=0.127][A[A[A[A



1/3 * Epoch (train):  33% 1224/3657 [05:52<12:20,  3.29it/s, loss=0.040][A[A[A[A



1/3 * Epoch (train):  33% 1225/3657 [05:52<12:26,  3.26it/s, loss=0.040][A[A[A[A



1/3 * Epoch (train):  33% 1225/3657 [05:52<12:26,  3.26it/s, loss=0.171][A[A[A[A



1/3 * Epoch (train):  34% 1226/3

1/3 * Epoch (train):  35% 1267/3657 [06:04<11:37,  3.43it/s, loss=0.257][A[A[A[A



1/3 * Epoch (train):  35% 1267/3657 [06:04<11:37,  3.43it/s, loss=0.129][A[A[A[A



1/3 * Epoch (train):  35% 1268/3657 [06:04<11:21,  3.50it/s, loss=0.129][A[A[A[A



1/3 * Epoch (train):  35% 1268/3657 [06:05<11:21,  3.50it/s, loss=0.390][A[A[A[A



1/3 * Epoch (train):  35% 1269/3657 [06:05<11:36,  3.43it/s, loss=0.390][A[A[A[A



1/3 * Epoch (train):  35% 1269/3657 [06:05<11:36,  3.43it/s, loss=0.155][A[A[A[A



1/3 * Epoch (train):  35% 1270/3657 [06:05<11:30,  3.46it/s, loss=0.155][A[A[A[A



1/3 * Epoch (train):  35% 1270/3657 [06:05<11:30,  3.46it/s, loss=0.143][A[A[A[A



1/3 * Epoch (train):  35% 1271/3657 [06:05<11:16,  3.53it/s, loss=0.143][A[A[A[A



1/3 * Epoch (train):  35% 1271/3657 [06:06<11:16,  3.53it/s, loss=0.746][A[A[A[A



1/3 * Epoch (train):  35% 1272/3657 [06:06<11:12,  3.55it/s, loss=0.746][A[A[A[A



1/3 * Epoch (train):  35% 1272/3

1/3 * Epoch (train):  36% 1313/3657 [06:17<11:45,  3.32it/s, loss=0.114][A[A[A[A



1/3 * Epoch (train):  36% 1314/3657 [06:17<12:05,  3.23it/s, loss=0.114][A[A[A[A



1/3 * Epoch (train):  36% 1314/3657 [06:18<12:05,  3.23it/s, loss=0.079][A[A[A[A



1/3 * Epoch (train):  36% 1315/3657 [06:18<12:03,  3.24it/s, loss=0.079][A[A[A[A



1/3 * Epoch (train):  36% 1315/3657 [06:18<12:03,  3.24it/s, loss=0.112][A[A[A[A



1/3 * Epoch (train):  36% 1316/3657 [06:18<12:08,  3.21it/s, loss=0.112][A[A[A[A



1/3 * Epoch (train):  36% 1316/3657 [06:18<12:08,  3.21it/s, loss=1.240][A[A[A[A



1/3 * Epoch (train):  36% 1317/3657 [06:18<11:55,  3.27it/s, loss=1.240][A[A[A[A



1/3 * Epoch (train):  36% 1317/3657 [06:19<11:55,  3.27it/s, loss=0.106][A[A[A[A



1/3 * Epoch (train):  36% 1318/3657 [06:19<11:38,  3.35it/s, loss=0.106][A[A[A[A



1/3 * Epoch (train):  36% 1318/3657 [06:19<11:38,  3.35it/s, loss=0.081][A[A[A[A



1/3 * Epoch (train):  36% 1319/3

1/3 * Epoch (train):  37% 1360/3657 [06:30<11:10,  3.43it/s, loss=0.069][A[A[A[A



1/3 * Epoch (train):  37% 1360/3657 [06:31<11:10,  3.43it/s, loss=0.092][A[A[A[A



1/3 * Epoch (train):  37% 1361/3657 [06:31<11:26,  3.34it/s, loss=0.092][A[A[A[A



1/3 * Epoch (train):  37% 1361/3657 [06:31<11:26,  3.34it/s, loss=0.050][A[A[A[A



1/3 * Epoch (train):  37% 1362/3657 [06:31<11:26,  3.34it/s, loss=0.050][A[A[A[A



1/3 * Epoch (train):  37% 1362/3657 [06:31<11:26,  3.34it/s, loss=0.505][A[A[A[A



1/3 * Epoch (train):  37% 1363/3657 [06:31<11:29,  3.33it/s, loss=0.505][A[A[A[A



1/3 * Epoch (train):  37% 1363/3657 [06:32<11:29,  3.33it/s, loss=0.061][A[A[A[A



1/3 * Epoch (train):  37% 1364/3657 [06:32<11:24,  3.35it/s, loss=0.061][A[A[A[A



1/3 * Epoch (train):  37% 1364/3657 [06:32<11:24,  3.35it/s, loss=0.298][A[A[A[A



1/3 * Epoch (train):  37% 1365/3657 [06:32<11:13,  3.40it/s, loss=0.298][A[A[A[A



1/3 * Epoch (train):  37% 1365/3

1/3 * Epoch (train):  38% 1406/3657 [06:44<11:25,  3.28it/s, loss=0.073][A[A[A[A



1/3 * Epoch (train):  38% 1407/3657 [06:44<11:07,  3.37it/s, loss=0.073][A[A[A[A



1/3 * Epoch (train):  38% 1407/3657 [06:44<11:07,  3.37it/s, loss=0.320][A[A[A[A



1/3 * Epoch (train):  39% 1408/3657 [06:44<10:57,  3.42it/s, loss=0.320][A[A[A[A



1/3 * Epoch (train):  39% 1408/3657 [06:45<10:57,  3.42it/s, loss=0.047][A[A[A[A



1/3 * Epoch (train):  39% 1409/3657 [06:45<10:44,  3.49it/s, loss=0.047][A[A[A[A



1/3 * Epoch (train):  39% 1409/3657 [06:45<10:44,  3.49it/s, loss=0.064][A[A[A[A



1/3 * Epoch (train):  39% 1410/3657 [06:45<10:27,  3.58it/s, loss=0.064][A[A[A[A



1/3 * Epoch (train):  39% 1410/3657 [06:45<10:27,  3.58it/s, loss=0.090][A[A[A[A



1/3 * Epoch (train):  39% 1411/3657 [06:45<10:19,  3.62it/s, loss=0.090][A[A[A[A



1/3 * Epoch (train):  39% 1411/3657 [06:46<10:19,  3.62it/s, loss=0.228][A[A[A[A



1/3 * Epoch (train):  39% 1412/3

1/3 * Epoch (train):  40% 1453/3657 [06:57<09:56,  3.70it/s, loss=0.068][A[A[A[A



1/3 * Epoch (train):  40% 1453/3657 [06:58<09:56,  3.70it/s, loss=0.117][A[A[A[A



1/3 * Epoch (train):  40% 1454/3657 [06:58<09:57,  3.69it/s, loss=0.117][A[A[A[A



1/3 * Epoch (train):  40% 1454/3657 [06:58<09:57,  3.69it/s, loss=0.175][A[A[A[A



1/3 * Epoch (train):  40% 1455/3657 [06:58<09:55,  3.70it/s, loss=0.175][A[A[A[A



1/3 * Epoch (train):  40% 1455/3657 [06:58<09:55,  3.70it/s, loss=0.099][A[A[A[A



1/3 * Epoch (train):  40% 1456/3657 [06:58<09:52,  3.71it/s, loss=0.099][A[A[A[A



1/3 * Epoch (train):  40% 1456/3657 [06:58<09:52,  3.71it/s, loss=0.586][A[A[A[A



1/3 * Epoch (train):  40% 1457/3657 [06:58<09:49,  3.73it/s, loss=0.586][A[A[A[A



1/3 * Epoch (train):  40% 1457/3657 [06:59<09:49,  3.73it/s, loss=0.514][A[A[A[A



1/3 * Epoch (train):  40% 1458/3657 [06:59<09:51,  3.72it/s, loss=0.514][A[A[A[A



1/3 * Epoch (train):  40% 1458/3

1/3 * Epoch (train):  41% 1499/3657 [07:11<10:31,  3.42it/s, loss=0.218][A[A[A[A



1/3 * Epoch (train):  41% 1500/3657 [07:11<10:29,  3.43it/s, loss=0.218][A[A[A[A



1/3 * Epoch (train):  41% 1500/3657 [07:11<10:29,  3.43it/s, loss=0.030][A[A[A[A



1/3 * Epoch (train):  41% 1501/3657 [07:11<10:32,  3.41it/s, loss=0.030][A[A[A[A



1/3 * Epoch (train):  41% 1501/3657 [07:11<10:32,  3.41it/s, loss=0.041][A[A[A[A



1/3 * Epoch (train):  41% 1502/3657 [07:11<10:13,  3.51it/s, loss=0.041][A[A[A[A



1/3 * Epoch (train):  41% 1502/3657 [07:12<10:13,  3.51it/s, loss=0.115][A[A[A[A



1/3 * Epoch (train):  41% 1503/3657 [07:12<10:18,  3.48it/s, loss=0.115][A[A[A[A



1/3 * Epoch (train):  41% 1503/3657 [07:12<10:18,  3.48it/s, loss=0.057][A[A[A[A



1/3 * Epoch (train):  41% 1504/3657 [07:12<10:23,  3.45it/s, loss=0.057][A[A[A[A



1/3 * Epoch (train):  41% 1504/3657 [07:12<10:23,  3.45it/s, loss=0.179][A[A[A[A



1/3 * Epoch (train):  41% 1505/3

1/3 * Epoch (train):  42% 1546/3657 [07:24<10:14,  3.44it/s, loss=0.115][A[A[A[A



1/3 * Epoch (train):  42% 1546/3657 [07:24<10:14,  3.44it/s, loss=7.889][A[A[A[A



1/3 * Epoch (train):  42% 1547/3657 [07:24<10:05,  3.49it/s, loss=7.889][A[A[A[A



1/3 * Epoch (train):  42% 1547/3657 [07:24<10:05,  3.49it/s, loss=0.174][A[A[A[A



1/3 * Epoch (train):  42% 1548/3657 [07:24<10:29,  3.35it/s, loss=0.174][A[A[A[A



1/3 * Epoch (train):  42% 1548/3657 [07:25<10:29,  3.35it/s, loss=0.230][A[A[A[A



1/3 * Epoch (train):  42% 1549/3657 [07:25<10:39,  3.29it/s, loss=0.230][A[A[A[A



1/3 * Epoch (train):  42% 1549/3657 [07:25<10:39,  3.29it/s, loss=0.146][A[A[A[A



1/3 * Epoch (train):  42% 1550/3657 [07:25<10:21,  3.39it/s, loss=0.146][A[A[A[A



1/3 * Epoch (train):  42% 1550/3657 [07:25<10:21,  3.39it/s, loss=0.110][A[A[A[A



1/3 * Epoch (train):  42% 1551/3657 [07:25<10:16,  3.42it/s, loss=0.110][A[A[A[A



1/3 * Epoch (train):  42% 1551/3

1/3 * Epoch (train):  44% 1592/3657 [07:37<09:31,  3.62it/s, loss=0.103][A[A[A[A



1/3 * Epoch (train):  44% 1593/3657 [07:37<09:26,  3.64it/s, loss=0.103][A[A[A[A



1/3 * Epoch (train):  44% 1593/3657 [07:38<09:26,  3.64it/s, loss=0.080][A[A[A[A



1/3 * Epoch (train):  44% 1594/3657 [07:38<09:30,  3.62it/s, loss=0.080][A[A[A[A



1/3 * Epoch (train):  44% 1594/3657 [07:38<09:30,  3.62it/s, loss=0.080][A[A[A[A



1/3 * Epoch (train):  44% 1595/3657 [07:38<09:23,  3.66it/s, loss=0.080][A[A[A[A



1/3 * Epoch (train):  44% 1595/3657 [07:38<09:23,  3.66it/s, loss=0.132][A[A[A[A



1/3 * Epoch (train):  44% 1596/3657 [07:38<09:25,  3.65it/s, loss=0.132][A[A[A[A



1/3 * Epoch (train):  44% 1596/3657 [07:38<09:25,  3.65it/s, loss=0.045][A[A[A[A



1/3 * Epoch (train):  44% 1597/3657 [07:38<09:25,  3.64it/s, loss=0.045][A[A[A[A



1/3 * Epoch (train):  44% 1597/3657 [07:39<09:25,  3.64it/s, loss=0.077][A[A[A[A



1/3 * Epoch (train):  44% 1598/3

1/3 * Epoch (train):  45% 1639/3657 [07:50<09:32,  3.53it/s, loss=0.170][A[A[A[A



1/3 * Epoch (train):  45% 1639/3657 [07:50<09:32,  3.53it/s, loss=0.531][A[A[A[A



1/3 * Epoch (train):  45% 1640/3657 [07:50<09:22,  3.59it/s, loss=0.531][A[A[A[A



1/3 * Epoch (train):  45% 1640/3657 [07:51<09:22,  3.59it/s, loss=0.113][A[A[A[A



1/3 * Epoch (train):  45% 1641/3657 [07:51<09:17,  3.62it/s, loss=0.113][A[A[A[A



1/3 * Epoch (train):  45% 1641/3657 [07:51<09:17,  3.62it/s, loss=0.202][A[A[A[A



1/3 * Epoch (train):  45% 1642/3657 [07:51<09:13,  3.64it/s, loss=0.202][A[A[A[A



1/3 * Epoch (train):  45% 1642/3657 [07:51<09:13,  3.64it/s, loss=0.107][A[A[A[A



1/3 * Epoch (train):  45% 1643/3657 [07:51<09:12,  3.64it/s, loss=0.107][A[A[A[A



1/3 * Epoch (train):  45% 1643/3657 [07:52<09:12,  3.64it/s, loss=0.207][A[A[A[A



1/3 * Epoch (train):  45% 1644/3657 [07:52<09:18,  3.60it/s, loss=0.207][A[A[A[A



1/3 * Epoch (train):  45% 1644/3

1/3 * Epoch (train):  46% 1685/3657 [08:04<09:48,  3.35it/s, loss=5.134][A[A[A[A



1/3 * Epoch (train):  46% 1686/3657 [08:04<09:48,  3.35it/s, loss=5.134][A[A[A[A



1/3 * Epoch (train):  46% 1686/3657 [08:04<09:48,  3.35it/s, loss=0.104][A[A[A[A



1/3 * Epoch (train):  46% 1687/3657 [08:04<10:02,  3.27it/s, loss=0.104][A[A[A[A



1/3 * Epoch (train):  46% 1687/3657 [08:04<10:02,  3.27it/s, loss=0.059][A[A[A[A



1/3 * Epoch (train):  46% 1688/3657 [08:04<10:11,  3.22it/s, loss=0.059][A[A[A[A



1/3 * Epoch (train):  46% 1688/3657 [08:05<10:11,  3.22it/s, loss=0.031][A[A[A[A



1/3 * Epoch (train):  46% 1689/3657 [08:05<10:06,  3.24it/s, loss=0.031][A[A[A[A



1/3 * Epoch (train):  46% 1689/3657 [08:05<10:06,  3.24it/s, loss=0.125][A[A[A[A



1/3 * Epoch (train):  46% 1690/3657 [08:05<09:58,  3.29it/s, loss=0.125][A[A[A[A



1/3 * Epoch (train):  46% 1690/3657 [08:05<09:58,  3.29it/s, loss=0.102][A[A[A[A



1/3 * Epoch (train):  46% 1691/3

1/3 * Epoch (train):  47% 1732/3657 [08:18<09:33,  3.36it/s, loss=0.086][A[A[A[A



1/3 * Epoch (train):  47% 1732/3657 [08:18<09:33,  3.36it/s, loss=0.247][A[A[A[A



1/3 * Epoch (train):  47% 1733/3657 [08:18<09:35,  3.34it/s, loss=0.247][A[A[A[A



1/3 * Epoch (train):  47% 1733/3657 [08:18<09:35,  3.34it/s, loss=0.199][A[A[A[A



1/3 * Epoch (train):  47% 1734/3657 [08:18<09:27,  3.39it/s, loss=0.199][A[A[A[A



1/3 * Epoch (train):  47% 1734/3657 [08:18<09:27,  3.39it/s, loss=0.054][A[A[A[A



1/3 * Epoch (train):  47% 1735/3657 [08:18<09:24,  3.41it/s, loss=0.054][A[A[A[A



1/3 * Epoch (train):  47% 1735/3657 [08:19<09:24,  3.41it/s, loss=2.025][A[A[A[A



1/3 * Epoch (train):  47% 1736/3657 [08:19<09:25,  3.40it/s, loss=2.025][A[A[A[A



1/3 * Epoch (train):  47% 1736/3657 [08:19<09:25,  3.40it/s, loss=0.164][A[A[A[A



1/3 * Epoch (train):  47% 1737/3657 [08:19<09:29,  3.37it/s, loss=0.164][A[A[A[A



1/3 * Epoch (train):  47% 1737/3

1/3 * Epoch (train):  49% 1778/3657 [08:31<09:17,  3.37it/s, loss=0.142][A[A[A[A



1/3 * Epoch (train):  49% 1779/3657 [08:31<09:17,  3.37it/s, loss=0.142][A[A[A[A



1/3 * Epoch (train):  49% 1779/3657 [08:32<09:17,  3.37it/s, loss=0.120][A[A[A[A



1/3 * Epoch (train):  49% 1780/3657 [08:32<09:09,  3.42it/s, loss=0.120][A[A[A[A



1/3 * Epoch (train):  49% 1780/3657 [08:32<09:09,  3.42it/s, loss=0.104][A[A[A[A



1/3 * Epoch (train):  49% 1781/3657 [08:32<09:05,  3.44it/s, loss=0.104][A[A[A[A



1/3 * Epoch (train):  49% 1781/3657 [08:32<09:05,  3.44it/s, loss=0.121][A[A[A[A



1/3 * Epoch (train):  49% 1782/3657 [08:32<09:08,  3.42it/s, loss=0.121][A[A[A[A



1/3 * Epoch (train):  49% 1782/3657 [08:33<09:08,  3.42it/s, loss=0.126][A[A[A[A



1/3 * Epoch (train):  49% 1783/3657 [08:33<08:59,  3.47it/s, loss=0.126][A[A[A[A



1/3 * Epoch (train):  49% 1783/3657 [08:33<08:59,  3.47it/s, loss=0.022][A[A[A[A



1/3 * Epoch (train):  49% 1784/3

1/3 * Epoch (train):  50% 1825/3657 [08:45<08:40,  3.52it/s, loss=0.101][A[A[A[A



1/3 * Epoch (train):  50% 1825/3657 [08:45<08:40,  3.52it/s, loss=0.050][A[A[A[A



1/3 * Epoch (train):  50% 1826/3657 [08:45<08:37,  3.54it/s, loss=0.050][A[A[A[A



1/3 * Epoch (train):  50% 1826/3657 [08:45<08:37,  3.54it/s, loss=18.349][A[A[A[A



1/3 * Epoch (train):  50% 1827/3657 [08:45<08:28,  3.60it/s, loss=18.349][A[A[A[A



1/3 * Epoch (train):  50% 1827/3657 [08:46<08:28,  3.60it/s, loss=0.088] [A[A[A[A



1/3 * Epoch (train):  50% 1828/3657 [08:46<08:29,  3.59it/s, loss=0.088][A[A[A[A



1/3 * Epoch (train):  50% 1828/3657 [08:46<08:29,  3.59it/s, loss=0.172][A[A[A[A



1/3 * Epoch (train):  50% 1829/3657 [08:46<08:24,  3.62it/s, loss=0.172][A[A[A[A



1/3 * Epoch (train):  50% 1829/3657 [08:46<08:24,  3.62it/s, loss=0.075][A[A[A[A



1/3 * Epoch (train):  50% 1830/3657 [08:46<08:33,  3.56it/s, loss=0.075][A[A[A[A



1/3 * Epoch (train):  50% 183

1/3 * Epoch (train):  51% 1871/3657 [08:58<08:15,  3.60it/s, loss=0.108][A[A[A[A



1/3 * Epoch (train):  51% 1872/3657 [08:58<08:31,  3.49it/s, loss=0.108][A[A[A[A



1/3 * Epoch (train):  51% 1872/3657 [08:59<08:31,  3.49it/s, loss=0.872][A[A[A[A



1/3 * Epoch (train):  51% 1873/3657 [08:59<08:31,  3.49it/s, loss=0.872][A[A[A[A



1/3 * Epoch (train):  51% 1873/3657 [08:59<08:31,  3.49it/s, loss=0.117][A[A[A[A



1/3 * Epoch (train):  51% 1874/3657 [08:59<08:39,  3.43it/s, loss=0.117][A[A[A[A



1/3 * Epoch (train):  51% 1874/3657 [08:59<08:39,  3.43it/s, loss=0.242][A[A[A[A



1/3 * Epoch (train):  51% 1875/3657 [08:59<08:51,  3.35it/s, loss=0.242][A[A[A[A



1/3 * Epoch (train):  51% 1875/3657 [09:00<08:51,  3.35it/s, loss=0.088][A[A[A[A



1/3 * Epoch (train):  51% 1876/3657 [09:00<08:53,  3.34it/s, loss=0.088][A[A[A[A



1/3 * Epoch (train):  51% 1876/3657 [09:00<08:53,  3.34it/s, loss=0.131][A[A[A[A



1/3 * Epoch (train):  51% 1877/3

1/3 * Epoch (train):  52% 1918/3657 [09:12<08:37,  3.36it/s, loss=0.197][A[A[A[A



1/3 * Epoch (train):  52% 1918/3657 [09:12<08:37,  3.36it/s, loss=0.060][A[A[A[A



1/3 * Epoch (train):  52% 1919/3657 [09:12<08:45,  3.30it/s, loss=0.060][A[A[A[A



1/3 * Epoch (train):  52% 1919/3657 [09:12<08:45,  3.30it/s, loss=0.255][A[A[A[A



1/3 * Epoch (train):  53% 1920/3657 [09:12<08:28,  3.42it/s, loss=0.255][A[A[A[A



1/3 * Epoch (train):  53% 1920/3657 [09:12<08:28,  3.42it/s, loss=0.217][A[A[A[A



1/3 * Epoch (train):  53% 1921/3657 [09:12<08:24,  3.44it/s, loss=0.217][A[A[A[A



1/3 * Epoch (train):  53% 1921/3657 [09:13<08:24,  3.44it/s, loss=0.019][A[A[A[A



1/3 * Epoch (train):  53% 1922/3657 [09:13<08:26,  3.43it/s, loss=0.019][A[A[A[A



1/3 * Epoch (train):  53% 1922/3657 [09:13<08:26,  3.43it/s, loss=0.090][A[A[A[A



1/3 * Epoch (train):  53% 1923/3657 [09:13<08:24,  3.44it/s, loss=0.090][A[A[A[A



1/3 * Epoch (train):  53% 1923/3

1/3 * Epoch (train):  54% 1964/3657 [09:25<08:21,  3.38it/s, loss=0.486][A[A[A[A



1/3 * Epoch (train):  54% 1965/3657 [09:25<08:18,  3.39it/s, loss=0.486][A[A[A[A



1/3 * Epoch (train):  54% 1965/3657 [09:26<08:18,  3.39it/s, loss=0.187][A[A[A[A



1/3 * Epoch (train):  54% 1966/3657 [09:26<08:13,  3.43it/s, loss=0.187][A[A[A[A



1/3 * Epoch (train):  54% 1966/3657 [09:26<08:13,  3.43it/s, loss=0.083][A[A[A[A



1/3 * Epoch (train):  54% 1967/3657 [09:26<08:10,  3.45it/s, loss=0.083][A[A[A[A



1/3 * Epoch (train):  54% 1967/3657 [09:26<08:10,  3.45it/s, loss=0.420][A[A[A[A



1/3 * Epoch (train):  54% 1968/3657 [09:26<08:15,  3.41it/s, loss=0.420][A[A[A[A



1/3 * Epoch (train):  54% 1968/3657 [09:27<08:15,  3.41it/s, loss=1.109][A[A[A[A



1/3 * Epoch (train):  54% 1969/3657 [09:27<08:12,  3.43it/s, loss=1.109][A[A[A[A



1/3 * Epoch (train):  54% 1969/3657 [09:27<08:12,  3.43it/s, loss=0.089][A[A[A[A



1/3 * Epoch (train):  54% 1970/3

1/3 * Epoch (train):  55% 2011/3657 [09:39<08:07,  3.38it/s, loss=0.113][A[A[A[A



1/3 * Epoch (train):  55% 2011/3657 [09:39<08:07,  3.38it/s, loss=0.032][A[A[A[A



1/3 * Epoch (train):  55% 2012/3657 [09:39<08:03,  3.40it/s, loss=0.032][A[A[A[A



1/3 * Epoch (train):  55% 2012/3657 [09:39<08:03,  3.40it/s, loss=0.192][A[A[A[A



1/3 * Epoch (train):  55% 2013/3657 [09:39<07:54,  3.47it/s, loss=0.192][A[A[A[A



1/3 * Epoch (train):  55% 2013/3657 [09:40<07:54,  3.47it/s, loss=0.094][A[A[A[A



1/3 * Epoch (train):  55% 2014/3657 [09:40<07:44,  3.54it/s, loss=0.094][A[A[A[A



1/3 * Epoch (train):  55% 2014/3657 [09:40<07:44,  3.54it/s, loss=0.097][A[A[A[A



1/3 * Epoch (train):  55% 2015/3657 [09:40<07:36,  3.59it/s, loss=0.097][A[A[A[A



1/3 * Epoch (train):  55% 2015/3657 [09:40<07:36,  3.59it/s, loss=0.302][A[A[A[A



1/3 * Epoch (train):  55% 2016/3657 [09:40<07:40,  3.57it/s, loss=0.302][A[A[A[A



1/3 * Epoch (train):  55% 2016/3

1/3 * Epoch (train):  56% 2057/3657 [09:53<07:48,  3.41it/s, loss=0.276][A[A[A[A



1/3 * Epoch (train):  56% 2058/3657 [09:53<07:48,  3.41it/s, loss=0.276][A[A[A[A



1/3 * Epoch (train):  56% 2058/3657 [09:53<07:48,  3.41it/s, loss=0.067][A[A[A[A



1/3 * Epoch (train):  56% 2059/3657 [09:53<07:41,  3.46it/s, loss=0.067][A[A[A[A



1/3 * Epoch (train):  56% 2059/3657 [09:53<07:41,  3.46it/s, loss=0.126][A[A[A[A



1/3 * Epoch (train):  56% 2060/3657 [09:53<07:40,  3.47it/s, loss=0.126][A[A[A[A



1/3 * Epoch (train):  56% 2060/3657 [09:54<07:40,  3.47it/s, loss=0.122][A[A[A[A



1/3 * Epoch (train):  56% 2061/3657 [09:54<07:43,  3.44it/s, loss=0.122][A[A[A[A



1/3 * Epoch (train):  56% 2061/3657 [09:54<07:43,  3.44it/s, loss=0.139][A[A[A[A



1/3 * Epoch (train):  56% 2062/3657 [09:54<07:50,  3.39it/s, loss=0.139][A[A[A[A



1/3 * Epoch (train):  56% 2062/3657 [09:54<07:50,  3.39it/s, loss=0.080][A[A[A[A



1/3 * Epoch (train):  56% 2063/3

1/3 * Epoch (train):  58% 2104/3657 [10:06<07:37,  3.40it/s, loss=0.117][A[A[A[A



1/3 * Epoch (train):  58% 2104/3657 [10:07<07:37,  3.40it/s, loss=0.070][A[A[A[A



1/3 * Epoch (train):  58% 2105/3657 [10:07<07:48,  3.31it/s, loss=0.070][A[A[A[A



1/3 * Epoch (train):  58% 2105/3657 [10:07<07:48,  3.31it/s, loss=0.140][A[A[A[A



1/3 * Epoch (train):  58% 2106/3657 [10:07<07:41,  3.36it/s, loss=0.140][A[A[A[A



1/3 * Epoch (train):  58% 2106/3657 [10:07<07:41,  3.36it/s, loss=0.214][A[A[A[A



1/3 * Epoch (train):  58% 2107/3657 [10:07<07:42,  3.35it/s, loss=0.214][A[A[A[A



1/3 * Epoch (train):  58% 2107/3657 [10:07<07:42,  3.35it/s, loss=0.072][A[A[A[A



1/3 * Epoch (train):  58% 2108/3657 [10:07<07:49,  3.30it/s, loss=0.072][A[A[A[A



1/3 * Epoch (train):  58% 2108/3657 [10:08<07:49,  3.30it/s, loss=0.104][A[A[A[A



1/3 * Epoch (train):  58% 2109/3657 [10:08<07:36,  3.39it/s, loss=0.104][A[A[A[A



1/3 * Epoch (train):  58% 2109/3

1/3 * Epoch (train):  59% 2150/3657 [10:20<06:56,  3.62it/s, loss=0.240][A[A[A[A



1/3 * Epoch (train):  59% 2151/3657 [10:20<07:06,  3.53it/s, loss=0.240][A[A[A[A



1/3 * Epoch (train):  59% 2151/3657 [10:20<07:06,  3.53it/s, loss=0.125][A[A[A[A



1/3 * Epoch (train):  59% 2152/3657 [10:20<07:12,  3.48it/s, loss=0.125][A[A[A[A



1/3 * Epoch (train):  59% 2152/3657 [10:21<07:12,  3.48it/s, loss=0.140][A[A[A[A



1/3 * Epoch (train):  59% 2153/3657 [10:21<07:07,  3.52it/s, loss=0.140][A[A[A[A



1/3 * Epoch (train):  59% 2153/3657 [10:21<07:07,  3.52it/s, loss=0.375][A[A[A[A



1/3 * Epoch (train):  59% 2154/3657 [10:21<07:05,  3.53it/s, loss=0.375][A[A[A[A



1/3 * Epoch (train):  59% 2154/3657 [10:21<07:05,  3.53it/s, loss=0.216][A[A[A[A



1/3 * Epoch (train):  59% 2155/3657 [10:21<07:04,  3.54it/s, loss=0.216][A[A[A[A



1/3 * Epoch (train):  59% 2155/3657 [10:21<07:04,  3.54it/s, loss=0.126][A[A[A[A



1/3 * Epoch (train):  59% 2156/3

1/3 * Epoch (train):  60% 2197/3657 [10:33<06:53,  3.53it/s, loss=0.232][A[A[A[A



1/3 * Epoch (train):  60% 2197/3657 [10:33<06:53,  3.53it/s, loss=0.054][A[A[A[A



1/3 * Epoch (train):  60% 2198/3657 [10:33<06:58,  3.49it/s, loss=0.054][A[A[A[A



1/3 * Epoch (train):  60% 2198/3657 [10:33<06:58,  3.49it/s, loss=0.251][A[A[A[A



1/3 * Epoch (train):  60% 2199/3657 [10:33<06:53,  3.53it/s, loss=0.251][A[A[A[A



1/3 * Epoch (train):  60% 2199/3657 [10:34<06:53,  3.53it/s, loss=0.038][A[A[A[A



1/3 * Epoch (train):  60% 2200/3657 [10:34<06:50,  3.55it/s, loss=0.038][A[A[A[A



1/3 * Epoch (train):  60% 2200/3657 [10:34<06:50,  3.55it/s, loss=0.161][A[A[A[A



1/3 * Epoch (train):  60% 2201/3657 [10:34<06:49,  3.55it/s, loss=0.161][A[A[A[A



1/3 * Epoch (train):  60% 2201/3657 [10:34<06:49,  3.55it/s, loss=0.302][A[A[A[A



1/3 * Epoch (train):  60% 2202/3657 [10:34<06:42,  3.61it/s, loss=0.302][A[A[A[A



1/3 * Epoch (train):  60% 2202/3

1/3 * Epoch (train):  61% 2243/3657 [10:46<06:55,  3.40it/s, loss=0.183][A[A[A[A



1/3 * Epoch (train):  61% 2244/3657 [10:46<07:01,  3.35it/s, loss=0.183][A[A[A[A



1/3 * Epoch (train):  61% 2244/3657 [10:46<07:01,  3.35it/s, loss=0.135][A[A[A[A



1/3 * Epoch (train):  61% 2245/3657 [10:46<06:59,  3.37it/s, loss=0.135][A[A[A[A



1/3 * Epoch (train):  61% 2245/3657 [10:46<06:59,  3.37it/s, loss=0.044][A[A[A[A



1/3 * Epoch (train):  61% 2246/3657 [10:46<06:51,  3.43it/s, loss=0.044][A[A[A[A



1/3 * Epoch (train):  61% 2246/3657 [10:47<06:51,  3.43it/s, loss=0.368][A[A[A[A



1/3 * Epoch (train):  61% 2247/3657 [10:47<06:54,  3.40it/s, loss=0.368][A[A[A[A



1/3 * Epoch (train):  61% 2247/3657 [10:47<06:54,  3.40it/s, loss=0.106][A[A[A[A



1/3 * Epoch (train):  61% 2248/3657 [10:47<07:03,  3.33it/s, loss=0.106][A[A[A[A



1/3 * Epoch (train):  61% 2248/3657 [10:47<07:03,  3.33it/s, loss=0.329][A[A[A[A



1/3 * Epoch (train):  61% 2249/3

1/3 * Epoch (train):  63% 2290/3657 [10:59<06:33,  3.48it/s, loss=0.226][A[A[A[A



1/3 * Epoch (train):  63% 2290/3657 [10:59<06:33,  3.48it/s, loss=0.164][A[A[A[A



1/3 * Epoch (train):  63% 2291/3657 [10:59<06:53,  3.31it/s, loss=0.164][A[A[A[A



1/3 * Epoch (train):  63% 2291/3657 [11:00<06:53,  3.31it/s, loss=0.249][A[A[A[A



1/3 * Epoch (train):  63% 2292/3657 [11:00<06:48,  3.34it/s, loss=0.249][A[A[A[A



1/3 * Epoch (train):  63% 2292/3657 [11:00<06:48,  3.34it/s, loss=0.296][A[A[A[A



1/3 * Epoch (train):  63% 2293/3657 [11:00<06:42,  3.38it/s, loss=0.296][A[A[A[A



1/3 * Epoch (train):  63% 2293/3657 [11:00<06:42,  3.38it/s, loss=0.083][A[A[A[A



1/3 * Epoch (train):  63% 2294/3657 [11:00<06:41,  3.40it/s, loss=0.083][A[A[A[A



1/3 * Epoch (train):  63% 2294/3657 [11:01<06:41,  3.40it/s, loss=0.605][A[A[A[A



1/3 * Epoch (train):  63% 2295/3657 [11:01<06:46,  3.35it/s, loss=0.605][A[A[A[A



1/3 * Epoch (train):  63% 2295/3

1/3 * Epoch (train):  64% 2336/3657 [11:13<06:00,  3.66it/s, loss=2.391][A[A[A[A



1/3 * Epoch (train):  64% 2337/3657 [11:13<05:57,  3.69it/s, loss=2.391][A[A[A[A



1/3 * Epoch (train):  64% 2337/3657 [11:13<05:57,  3.69it/s, loss=0.076][A[A[A[A



1/3 * Epoch (train):  64% 2338/3657 [11:13<06:12,  3.54it/s, loss=0.076][A[A[A[A



1/3 * Epoch (train):  64% 2338/3657 [11:13<06:12,  3.54it/s, loss=0.071][A[A[A[A



1/3 * Epoch (train):  64% 2339/3657 [11:13<06:16,  3.50it/s, loss=0.071][A[A[A[A



1/3 * Epoch (train):  64% 2339/3657 [11:14<06:16,  3.50it/s, loss=0.071][A[A[A[A



1/3 * Epoch (train):  64% 2340/3657 [11:14<06:27,  3.40it/s, loss=0.071][A[A[A[A



1/3 * Epoch (train):  64% 2340/3657 [11:14<06:27,  3.40it/s, loss=0.431][A[A[A[A



1/3 * Epoch (train):  64% 2341/3657 [11:14<06:36,  3.32it/s, loss=0.431][A[A[A[A



1/3 * Epoch (train):  64% 2341/3657 [11:14<06:36,  3.32it/s, loss=0.058][A[A[A[A



1/3 * Epoch (train):  64% 2342/3

1/3 * Epoch (train):  65% 2383/3657 [11:26<06:04,  3.50it/s, loss=0.065][A[A[A[A



1/3 * Epoch (train):  65% 2383/3657 [11:27<06:04,  3.50it/s, loss=0.103][A[A[A[A



1/3 * Epoch (train):  65% 2384/3657 [11:27<06:01,  3.52it/s, loss=0.103][A[A[A[A



1/3 * Epoch (train):  65% 2384/3657 [11:27<06:01,  3.52it/s, loss=4.927][A[A[A[A



1/3 * Epoch (train):  65% 2385/3657 [11:27<06:03,  3.50it/s, loss=4.927][A[A[A[A



1/3 * Epoch (train):  65% 2385/3657 [11:27<06:03,  3.50it/s, loss=0.063][A[A[A[A



1/3 * Epoch (train):  65% 2386/3657 [11:27<05:55,  3.58it/s, loss=0.063][A[A[A[A



1/3 * Epoch (train):  65% 2386/3657 [11:27<05:55,  3.58it/s, loss=0.113][A[A[A[A



1/3 * Epoch (train):  65% 2387/3657 [11:27<05:49,  3.63it/s, loss=0.113][A[A[A[A



1/3 * Epoch (train):  65% 2387/3657 [11:28<05:49,  3.63it/s, loss=0.108][A[A[A[A



1/3 * Epoch (train):  65% 2388/3657 [11:28<05:46,  3.66it/s, loss=0.108][A[A[A[A



1/3 * Epoch (train):  65% 2388/3

1/3 * Epoch (train):  66% 2429/3657 [11:40<05:56,  3.44it/s, loss=0.167][A[A[A[A



1/3 * Epoch (train):  66% 2430/3657 [11:40<05:59,  3.42it/s, loss=0.167][A[A[A[A



1/3 * Epoch (train):  66% 2430/3657 [11:40<05:59,  3.42it/s, loss=0.433][A[A[A[A



1/3 * Epoch (train):  66% 2431/3657 [11:40<05:55,  3.44it/s, loss=0.433][A[A[A[A



1/3 * Epoch (train):  66% 2431/3657 [11:40<05:55,  3.44it/s, loss=0.216][A[A[A[A



1/3 * Epoch (train):  67% 2432/3657 [11:40<05:53,  3.47it/s, loss=0.216][A[A[A[A



1/3 * Epoch (train):  67% 2432/3657 [11:40<05:53,  3.47it/s, loss=0.027][A[A[A[A



1/3 * Epoch (train):  67% 2433/3657 [11:40<05:48,  3.51it/s, loss=0.027][A[A[A[A



1/3 * Epoch (train):  67% 2433/3657 [11:41<05:48,  3.51it/s, loss=0.263][A[A[A[A



1/3 * Epoch (train):  67% 2434/3657 [11:41<05:52,  3.47it/s, loss=0.263][A[A[A[A



1/3 * Epoch (train):  67% 2434/3657 [11:41<05:52,  3.47it/s, loss=0.265][A[A[A[A



1/3 * Epoch (train):  67% 2435/3

1/3 * Epoch (train):  68% 2476/3657 [11:53<05:59,  3.29it/s, loss=0.057][A[A[A[A



1/3 * Epoch (train):  68% 2476/3657 [11:53<05:59,  3.29it/s, loss=0.047][A[A[A[A



1/3 * Epoch (train):  68% 2477/3657 [11:53<05:59,  3.28it/s, loss=0.047][A[A[A[A



1/3 * Epoch (train):  68% 2477/3657 [11:54<05:59,  3.28it/s, loss=0.228][A[A[A[A



1/3 * Epoch (train):  68% 2478/3657 [11:54<05:58,  3.28it/s, loss=0.228][A[A[A[A



1/3 * Epoch (train):  68% 2478/3657 [11:54<05:58,  3.28it/s, loss=1.130][A[A[A[A



1/3 * Epoch (train):  68% 2479/3657 [11:54<06:00,  3.27it/s, loss=1.130][A[A[A[A



1/3 * Epoch (train):  68% 2479/3657 [11:54<06:00,  3.27it/s, loss=0.117][A[A[A[A



1/3 * Epoch (train):  68% 2480/3657 [11:54<06:02,  3.25it/s, loss=0.117][A[A[A[A



1/3 * Epoch (train):  68% 2480/3657 [11:55<06:02,  3.25it/s, loss=0.015][A[A[A[A



1/3 * Epoch (train):  68% 2481/3657 [11:55<06:04,  3.22it/s, loss=0.015][A[A[A[A



1/3 * Epoch (train):  68% 2481/3

1/3 * Epoch (train):  69% 2522/3657 [12:07<05:41,  3.32it/s, loss=0.275][A[A[A[A



1/3 * Epoch (train):  69% 2523/3657 [12:07<05:36,  3.37it/s, loss=0.275][A[A[A[A



1/3 * Epoch (train):  69% 2523/3657 [12:08<05:36,  3.37it/s, loss=0.056][A[A[A[A



1/3 * Epoch (train):  69% 2524/3657 [12:08<05:36,  3.37it/s, loss=0.056][A[A[A[A



1/3 * Epoch (train):  69% 2524/3657 [12:08<05:36,  3.37it/s, loss=0.154][A[A[A[A



1/3 * Epoch (train):  69% 2525/3657 [12:08<05:31,  3.42it/s, loss=0.154][A[A[A[A



1/3 * Epoch (train):  69% 2525/3657 [12:08<05:31,  3.42it/s, loss=0.104][A[A[A[A



1/3 * Epoch (train):  69% 2526/3657 [12:08<05:24,  3.48it/s, loss=0.104][A[A[A[A



1/3 * Epoch (train):  69% 2526/3657 [12:08<05:24,  3.48it/s, loss=0.084][A[A[A[A



1/3 * Epoch (train):  69% 2527/3657 [12:08<05:20,  3.52it/s, loss=0.084][A[A[A[A



1/3 * Epoch (train):  69% 2527/3657 [12:09<05:20,  3.52it/s, loss=0.323][A[A[A[A



1/3 * Epoch (train):  69% 2528/3

1/3 * Epoch (train):  70% 2569/3657 [12:21<05:30,  3.29it/s, loss=0.048][A[A[A[A



1/3 * Epoch (train):  70% 2569/3657 [12:21<05:30,  3.29it/s, loss=0.182][A[A[A[A



1/3 * Epoch (train):  70% 2570/3657 [12:21<05:32,  3.27it/s, loss=0.182][A[A[A[A



1/3 * Epoch (train):  70% 2570/3657 [12:21<05:32,  3.27it/s, loss=0.175][A[A[A[A



1/3 * Epoch (train):  70% 2571/3657 [12:21<05:26,  3.33it/s, loss=0.175][A[A[A[A



1/3 * Epoch (train):  70% 2571/3657 [12:22<05:26,  3.33it/s, loss=0.112][A[A[A[A



1/3 * Epoch (train):  70% 2572/3657 [12:22<05:25,  3.33it/s, loss=0.112][A[A[A[A



1/3 * Epoch (train):  70% 2572/3657 [12:22<05:25,  3.33it/s, loss=0.071][A[A[A[A



1/3 * Epoch (train):  70% 2573/3657 [12:22<05:24,  3.34it/s, loss=0.071][A[A[A[A



1/3 * Epoch (train):  70% 2573/3657 [12:22<05:24,  3.34it/s, loss=98.087][A[A[A[A



1/3 * Epoch (train):  70% 2574/3657 [12:22<05:25,  3.32it/s, loss=98.087][A[A[A[A



1/3 * Epoch (train):  70% 2574

1/3 * Epoch (train):  72% 2615/3657 [12:35<05:14,  3.31it/s, loss=0.458][A[A[A[A



1/3 * Epoch (train):  72% 2616/3657 [12:35<05:13,  3.33it/s, loss=0.458][A[A[A[A



1/3 * Epoch (train):  72% 2616/3657 [12:35<05:13,  3.33it/s, loss=0.686][A[A[A[A



1/3 * Epoch (train):  72% 2617/3657 [12:35<05:12,  3.33it/s, loss=0.686][A[A[A[A



1/3 * Epoch (train):  72% 2617/3657 [12:35<05:12,  3.33it/s, loss=0.124][A[A[A[A



1/3 * Epoch (train):  72% 2618/3657 [12:35<05:15,  3.29it/s, loss=0.124][A[A[A[A



1/3 * Epoch (train):  72% 2618/3657 [12:36<05:15,  3.29it/s, loss=0.106][A[A[A[A



1/3 * Epoch (train):  72% 2619/3657 [12:36<05:11,  3.33it/s, loss=0.106][A[A[A[A



1/3 * Epoch (train):  72% 2619/3657 [12:36<05:11,  3.33it/s, loss=0.073][A[A[A[A



1/3 * Epoch (train):  72% 2620/3657 [12:36<05:09,  3.35it/s, loss=0.073][A[A[A[A



1/3 * Epoch (train):  72% 2620/3657 [12:36<05:09,  3.35it/s, loss=0.065][A[A[A[A



1/3 * Epoch (train):  72% 2621/3

1/3 * Epoch (train):  73% 2662/3657 [12:48<04:50,  3.43it/s, loss=0.069][A[A[A[A



1/3 * Epoch (train):  73% 2662/3657 [12:48<04:50,  3.43it/s, loss=0.074][A[A[A[A



1/3 * Epoch (train):  73% 2663/3657 [12:48<04:45,  3.48it/s, loss=0.074][A[A[A[A



1/3 * Epoch (train):  73% 2663/3657 [12:49<04:45,  3.48it/s, loss=0.180][A[A[A[A



1/3 * Epoch (train):  73% 2664/3657 [12:49<04:44,  3.50it/s, loss=0.180][A[A[A[A



1/3 * Epoch (train):  73% 2664/3657 [12:49<04:44,  3.50it/s, loss=0.197][A[A[A[A



1/3 * Epoch (train):  73% 2665/3657 [12:49<04:42,  3.51it/s, loss=0.197][A[A[A[A



1/3 * Epoch (train):  73% 2665/3657 [12:49<04:42,  3.51it/s, loss=0.145][A[A[A[A



1/3 * Epoch (train):  73% 2666/3657 [12:49<04:43,  3.50it/s, loss=0.145][A[A[A[A



1/3 * Epoch (train):  73% 2666/3657 [12:50<04:43,  3.50it/s, loss=0.170][A[A[A[A



1/3 * Epoch (train):  73% 2667/3657 [12:50<04:54,  3.37it/s, loss=0.170][A[A[A[A



1/3 * Epoch (train):  73% 2667/3

1/3 * Epoch (train):  74% 2708/3657 [13:02<04:20,  3.65it/s, loss=0.028][A[A[A[A



1/3 * Epoch (train):  74% 2709/3657 [13:02<04:17,  3.68it/s, loss=0.028][A[A[A[A



1/3 * Epoch (train):  74% 2709/3657 [13:02<04:17,  3.68it/s, loss=0.950][A[A[A[A



1/3 * Epoch (train):  74% 2710/3657 [13:02<04:22,  3.61it/s, loss=0.950][A[A[A[A



1/3 * Epoch (train):  74% 2710/3657 [13:02<04:22,  3.61it/s, loss=0.067][A[A[A[A



1/3 * Epoch (train):  74% 2711/3657 [13:02<04:24,  3.58it/s, loss=0.067][A[A[A[A



1/3 * Epoch (train):  74% 2711/3657 [13:02<04:24,  3.58it/s, loss=0.075][A[A[A[A



1/3 * Epoch (train):  74% 2712/3657 [13:02<04:23,  3.58it/s, loss=0.075][A[A[A[A



1/3 * Epoch (train):  74% 2712/3657 [13:03<04:23,  3.58it/s, loss=0.295][A[A[A[A



1/3 * Epoch (train):  74% 2713/3657 [13:03<04:28,  3.52it/s, loss=0.295][A[A[A[A



1/3 * Epoch (train):  74% 2713/3657 [13:03<04:28,  3.52it/s, loss=0.315][A[A[A[A



1/3 * Epoch (train):  74% 2714/3

1/3 * Epoch (train):  75% 2755/3657 [13:15<04:06,  3.66it/s, loss=0.200][A[A[A[A



1/3 * Epoch (train):  75% 2755/3657 [13:15<04:06,  3.66it/s, loss=0.043][A[A[A[A



1/3 * Epoch (train):  75% 2756/3657 [13:15<04:10,  3.60it/s, loss=0.043][A[A[A[A



1/3 * Epoch (train):  75% 2756/3657 [13:15<04:10,  3.60it/s, loss=0.023][A[A[A[A



1/3 * Epoch (train):  75% 2757/3657 [13:15<04:09,  3.61it/s, loss=0.023][A[A[A[A



1/3 * Epoch (train):  75% 2757/3657 [13:15<04:09,  3.61it/s, loss=0.122][A[A[A[A



1/3 * Epoch (train):  75% 2758/3657 [13:15<04:08,  3.61it/s, loss=0.122][A[A[A[A



1/3 * Epoch (train):  75% 2758/3657 [13:16<04:08,  3.61it/s, loss=0.327][A[A[A[A



1/3 * Epoch (train):  75% 2759/3657 [13:16<04:13,  3.54it/s, loss=0.327][A[A[A[A



1/3 * Epoch (train):  75% 2759/3657 [13:16<04:13,  3.54it/s, loss=0.050][A[A[A[A



1/3 * Epoch (train):  75% 2760/3657 [13:16<04:23,  3.40it/s, loss=0.050][A[A[A[A



1/3 * Epoch (train):  75% 2760/3

1/3 * Epoch (train):  77% 2801/3657 [13:28<04:01,  3.55it/s, loss=0.100][A[A[A[A



1/3 * Epoch (train):  77% 2802/3657 [13:28<04:00,  3.56it/s, loss=0.100][A[A[A[A



1/3 * Epoch (train):  77% 2802/3657 [13:28<04:00,  3.56it/s, loss=0.187][A[A[A[A



1/3 * Epoch (train):  77% 2803/3657 [13:28<03:59,  3.57it/s, loss=0.187][A[A[A[A



1/3 * Epoch (train):  77% 2803/3657 [13:29<03:59,  3.57it/s, loss=0.157][A[A[A[A



1/3 * Epoch (train):  77% 2804/3657 [13:29<03:56,  3.60it/s, loss=0.157][A[A[A[A



1/3 * Epoch (train):  77% 2804/3657 [13:29<03:56,  3.60it/s, loss=0.291][A[A[A[A



1/3 * Epoch (train):  77% 2805/3657 [13:29<03:55,  3.62it/s, loss=0.291][A[A[A[A



1/3 * Epoch (train):  77% 2805/3657 [13:29<03:55,  3.62it/s, loss=0.104][A[A[A[A



1/3 * Epoch (train):  77% 2806/3657 [13:29<03:55,  3.61it/s, loss=0.104][A[A[A[A



1/3 * Epoch (train):  77% 2806/3657 [13:29<03:55,  3.61it/s, loss=0.190][A[A[A[A



1/3 * Epoch (train):  77% 2807/3

1/3 * Epoch (train):  78% 2848/3657 [13:41<03:40,  3.67it/s, loss=0.202][A[A[A[A



1/3 * Epoch (train):  78% 2848/3657 [13:41<03:40,  3.67it/s, loss=0.097][A[A[A[A



1/3 * Epoch (train):  78% 2849/3657 [13:41<03:40,  3.66it/s, loss=0.097][A[A[A[A



1/3 * Epoch (train):  78% 2849/3657 [13:42<03:40,  3.66it/s, loss=0.129][A[A[A[A



1/3 * Epoch (train):  78% 2850/3657 [13:42<03:41,  3.64it/s, loss=0.129][A[A[A[A



1/3 * Epoch (train):  78% 2850/3657 [13:42<03:41,  3.64it/s, loss=0.188][A[A[A[A



1/3 * Epoch (train):  78% 2851/3657 [13:42<03:47,  3.55it/s, loss=0.188][A[A[A[A



1/3 * Epoch (train):  78% 2851/3657 [13:42<03:47,  3.55it/s, loss=0.454][A[A[A[A



1/3 * Epoch (train):  78% 2852/3657 [13:42<03:45,  3.57it/s, loss=0.454][A[A[A[A



1/3 * Epoch (train):  78% 2852/3657 [13:43<03:45,  3.57it/s, loss=0.065][A[A[A[A



1/3 * Epoch (train):  78% 2853/3657 [13:43<03:45,  3.57it/s, loss=0.065][A[A[A[A



1/3 * Epoch (train):  78% 2853/3

1/3 * Epoch (train):  79% 2894/3657 [13:54<03:35,  3.54it/s, loss=0.155][A[A[A[A



1/3 * Epoch (train):  79% 2895/3657 [13:54<03:39,  3.47it/s, loss=0.155][A[A[A[A



1/3 * Epoch (train):  79% 2895/3657 [13:55<03:39,  3.47it/s, loss=0.091][A[A[A[A



1/3 * Epoch (train):  79% 2896/3657 [13:55<03:42,  3.42it/s, loss=0.091][A[A[A[A



1/3 * Epoch (train):  79% 2896/3657 [13:55<03:42,  3.42it/s, loss=0.183][A[A[A[A



1/3 * Epoch (train):  79% 2897/3657 [13:55<03:48,  3.33it/s, loss=0.183][A[A[A[A



1/3 * Epoch (train):  79% 2897/3657 [13:55<03:48,  3.33it/s, loss=0.212][A[A[A[A



1/3 * Epoch (train):  79% 2898/3657 [13:55<03:41,  3.43it/s, loss=0.212][A[A[A[A



1/3 * Epoch (train):  79% 2898/3657 [13:56<03:41,  3.43it/s, loss=0.138][A[A[A[A



1/3 * Epoch (train):  79% 2899/3657 [13:56<03:38,  3.46it/s, loss=0.138][A[A[A[A



1/3 * Epoch (train):  79% 2899/3657 [13:56<03:38,  3.46it/s, loss=1.084][A[A[A[A



1/3 * Epoch (train):  79% 2900/3

1/3 * Epoch (train):  80% 2941/3657 [14:07<03:20,  3.58it/s, loss=0.063][A[A[A[A



1/3 * Epoch (train):  80% 2941/3657 [14:07<03:20,  3.58it/s, loss=0.135][A[A[A[A



1/3 * Epoch (train):  80% 2942/3657 [14:07<03:17,  3.61it/s, loss=0.135][A[A[A[A



1/3 * Epoch (train):  80% 2942/3657 [14:08<03:17,  3.61it/s, loss=0.086][A[A[A[A



1/3 * Epoch (train):  80% 2943/3657 [14:08<03:17,  3.62it/s, loss=0.086][A[A[A[A



1/3 * Epoch (train):  80% 2943/3657 [14:08<03:17,  3.62it/s, loss=0.052][A[A[A[A



1/3 * Epoch (train):  81% 2944/3657 [14:08<03:13,  3.68it/s, loss=0.052][A[A[A[A



1/3 * Epoch (train):  81% 2944/3657 [14:08<03:13,  3.68it/s, loss=0.122][A[A[A[A



1/3 * Epoch (train):  81% 2945/3657 [14:08<03:15,  3.65it/s, loss=0.122][A[A[A[A



1/3 * Epoch (train):  81% 2945/3657 [14:08<03:15,  3.65it/s, loss=0.205][A[A[A[A



1/3 * Epoch (train):  81% 2946/3657 [14:08<03:17,  3.59it/s, loss=0.205][A[A[A[A



1/3 * Epoch (train):  81% 2946/3

1/3 * Epoch (train):  82% 2987/3657 [14:20<02:58,  3.76it/s, loss=0.094][A[A[A[A



1/3 * Epoch (train):  82% 2988/3657 [14:20<02:56,  3.80it/s, loss=0.094][A[A[A[A



1/3 * Epoch (train):  82% 2988/3657 [14:20<02:56,  3.80it/s, loss=0.236][A[A[A[A



1/3 * Epoch (train):  82% 2989/3657 [14:20<02:55,  3.80it/s, loss=0.236][A[A[A[A



1/3 * Epoch (train):  82% 2989/3657 [14:21<02:55,  3.80it/s, loss=0.118][A[A[A[A



1/3 * Epoch (train):  82% 2990/3657 [14:21<02:54,  3.82it/s, loss=0.118][A[A[A[A



1/3 * Epoch (train):  82% 2990/3657 [14:21<02:54,  3.82it/s, loss=0.078][A[A[A[A



1/3 * Epoch (train):  82% 2991/3657 [14:21<02:54,  3.81it/s, loss=0.078][A[A[A[A



1/3 * Epoch (train):  82% 2991/3657 [14:21<02:54,  3.81it/s, loss=0.149][A[A[A[A



1/3 * Epoch (train):  82% 2992/3657 [14:21<02:59,  3.70it/s, loss=0.149][A[A[A[A



1/3 * Epoch (train):  82% 2992/3657 [14:21<02:59,  3.70it/s, loss=0.115][A[A[A[A



1/3 * Epoch (train):  82% 2993/3

1/3 * Epoch (train):  83% 3034/3657 [14:33<02:58,  3.48it/s, loss=0.368][A[A[A[A



1/3 * Epoch (train):  83% 3034/3657 [14:33<02:58,  3.48it/s, loss=0.458][A[A[A[A



1/3 * Epoch (train):  83% 3035/3657 [14:33<03:00,  3.45it/s, loss=0.458][A[A[A[A



1/3 * Epoch (train):  83% 3035/3657 [14:34<03:00,  3.45it/s, loss=0.146][A[A[A[A



1/3 * Epoch (train):  83% 3036/3657 [14:34<02:59,  3.46it/s, loss=0.146][A[A[A[A



1/3 * Epoch (train):  83% 3036/3657 [14:34<02:59,  3.46it/s, loss=0.131][A[A[A[A



1/3 * Epoch (train):  83% 3037/3657 [14:34<03:01,  3.42it/s, loss=0.131][A[A[A[A



1/3 * Epoch (train):  83% 3037/3657 [14:34<03:01,  3.42it/s, loss=0.193][A[A[A[A



1/3 * Epoch (train):  83% 3038/3657 [14:34<02:58,  3.47it/s, loss=0.193][A[A[A[A



1/3 * Epoch (train):  83% 3038/3657 [14:34<02:58,  3.47it/s, loss=0.228][A[A[A[A



1/3 * Epoch (train):  83% 3039/3657 [14:34<02:57,  3.48it/s, loss=0.228][A[A[A[A



1/3 * Epoch (train):  83% 3039/3

1/3 * Epoch (train):  84% 3080/3657 [14:46<02:41,  3.56it/s, loss=0.094][A[A[A[A



1/3 * Epoch (train):  84% 3081/3657 [14:46<02:41,  3.56it/s, loss=0.094][A[A[A[A



1/3 * Epoch (train):  84% 3081/3657 [14:47<02:41,  3.56it/s, loss=0.145][A[A[A[A



1/3 * Epoch (train):  84% 3082/3657 [14:47<02:40,  3.57it/s, loss=0.145][A[A[A[A



1/3 * Epoch (train):  84% 3082/3657 [14:47<02:40,  3.57it/s, loss=0.039][A[A[A[A



1/3 * Epoch (train):  84% 3083/3657 [14:47<02:37,  3.65it/s, loss=0.039][A[A[A[A



1/3 * Epoch (train):  84% 3083/3657 [14:47<02:37,  3.65it/s, loss=0.039][A[A[A[A



1/3 * Epoch (train):  84% 3084/3657 [14:47<02:35,  3.69it/s, loss=0.039][A[A[A[A



1/3 * Epoch (train):  84% 3084/3657 [14:47<02:35,  3.69it/s, loss=0.412][A[A[A[A



1/3 * Epoch (train):  84% 3085/3657 [14:47<02:34,  3.70it/s, loss=0.412][A[A[A[A



1/3 * Epoch (train):  84% 3085/3657 [14:48<02:34,  3.70it/s, loss=0.123][A[A[A[A



1/3 * Epoch (train):  84% 3086/3

1/3 * Epoch (train):  86% 3127/3657 [14:59<02:30,  3.52it/s, loss=0.091][A[A[A[A



1/3 * Epoch (train):  86% 3127/3657 [15:00<02:30,  3.52it/s, loss=0.094][A[A[A[A



1/3 * Epoch (train):  86% 3128/3657 [15:00<02:31,  3.49it/s, loss=0.094][A[A[A[A



1/3 * Epoch (train):  86% 3128/3657 [15:00<02:31,  3.49it/s, loss=0.063][A[A[A[A



1/3 * Epoch (train):  86% 3129/3657 [15:00<02:32,  3.47it/s, loss=0.063][A[A[A[A



1/3 * Epoch (train):  86% 3129/3657 [15:00<02:32,  3.47it/s, loss=0.178][A[A[A[A



1/3 * Epoch (train):  86% 3130/3657 [15:00<02:30,  3.49it/s, loss=0.178][A[A[A[A



1/3 * Epoch (train):  86% 3130/3657 [15:00<02:30,  3.49it/s, loss=0.115][A[A[A[A



1/3 * Epoch (train):  86% 3131/3657 [15:00<02:29,  3.53it/s, loss=0.115][A[A[A[A



1/3 * Epoch (train):  86% 3131/3657 [15:01<02:29,  3.53it/s, loss=0.175][A[A[A[A



1/3 * Epoch (train):  86% 3132/3657 [15:01<02:25,  3.61it/s, loss=0.175][A[A[A[A



1/3 * Epoch (train):  86% 3132/3

1/3 * Epoch (train):  87% 3173/3657 [15:13<02:12,  3.65it/s, loss=0.057][A[A[A[A



1/3 * Epoch (train):  87% 3174/3657 [15:13<02:13,  3.61it/s, loss=0.057][A[A[A[A



1/3 * Epoch (train):  87% 3174/3657 [15:13<02:13,  3.61it/s, loss=0.180][A[A[A[A



1/3 * Epoch (train):  87% 3175/3657 [15:13<02:18,  3.47it/s, loss=0.180][A[A[A[A



1/3 * Epoch (train):  87% 3175/3657 [15:13<02:18,  3.47it/s, loss=0.143][A[A[A[A



1/3 * Epoch (train):  87% 3176/3657 [15:13<02:20,  3.42it/s, loss=0.143][A[A[A[A



1/3 * Epoch (train):  87% 3176/3657 [15:13<02:20,  3.42it/s, loss=0.110][A[A[A[A



1/3 * Epoch (train):  87% 3177/3657 [15:13<02:20,  3.41it/s, loss=0.110][A[A[A[A



1/3 * Epoch (train):  87% 3177/3657 [15:14<02:20,  3.41it/s, loss=0.044][A[A[A[A



1/3 * Epoch (train):  87% 3178/3657 [15:14<02:21,  3.40it/s, loss=0.044][A[A[A[A



1/3 * Epoch (train):  87% 3178/3657 [15:14<02:21,  3.40it/s, loss=0.107][A[A[A[A



1/3 * Epoch (train):  87% 3179/3

1/3 * Epoch (train):  88% 3220/3657 [15:25<01:55,  3.79it/s, loss=0.103][A[A[A[A



1/3 * Epoch (train):  88% 3220/3657 [15:26<01:55,  3.79it/s, loss=0.084][A[A[A[A



1/3 * Epoch (train):  88% 3221/3657 [15:26<01:56,  3.73it/s, loss=0.084][A[A[A[A



1/3 * Epoch (train):  88% 3221/3657 [15:26<01:56,  3.73it/s, loss=0.597][A[A[A[A



1/3 * Epoch (train):  88% 3222/3657 [15:26<01:58,  3.66it/s, loss=0.597][A[A[A[A



1/3 * Epoch (train):  88% 3222/3657 [15:26<01:58,  3.66it/s, loss=0.797][A[A[A[A



1/3 * Epoch (train):  88% 3223/3657 [15:26<01:58,  3.65it/s, loss=0.797][A[A[A[A



1/3 * Epoch (train):  88% 3223/3657 [15:26<01:58,  3.65it/s, loss=0.045][A[A[A[A



1/3 * Epoch (train):  88% 3224/3657 [15:26<01:59,  3.64it/s, loss=0.045][A[A[A[A



1/3 * Epoch (train):  88% 3224/3657 [15:27<01:59,  3.64it/s, loss=0.058][A[A[A[A



1/3 * Epoch (train):  88% 3225/3657 [15:27<01:59,  3.61it/s, loss=0.058][A[A[A[A



1/3 * Epoch (train):  88% 3225/3

1/3 * Epoch (train):  89% 3266/3657 [15:39<01:48,  3.62it/s, loss=0.183][A[A[A[A



1/3 * Epoch (train):  89% 3267/3657 [15:39<01:49,  3.56it/s, loss=0.183][A[A[A[A



1/3 * Epoch (train):  89% 3267/3657 [15:39<01:49,  3.56it/s, loss=0.056][A[A[A[A



1/3 * Epoch (train):  89% 3268/3657 [15:39<01:49,  3.55it/s, loss=0.056][A[A[A[A



1/3 * Epoch (train):  89% 3268/3657 [15:39<01:49,  3.55it/s, loss=0.083][A[A[A[A



1/3 * Epoch (train):  89% 3269/3657 [15:39<01:47,  3.59it/s, loss=0.083][A[A[A[A



1/3 * Epoch (train):  89% 3269/3657 [15:39<01:47,  3.59it/s, loss=0.321][A[A[A[A



1/3 * Epoch (train):  89% 3270/3657 [15:39<01:45,  3.68it/s, loss=0.321][A[A[A[A



1/3 * Epoch (train):  89% 3270/3657 [15:40<01:45,  3.68it/s, loss=2.228][A[A[A[A



1/3 * Epoch (train):  89% 3271/3657 [15:40<01:43,  3.73it/s, loss=2.228][A[A[A[A



1/3 * Epoch (train):  89% 3271/3657 [15:40<01:43,  3.73it/s, loss=0.056][A[A[A[A



1/3 * Epoch (train):  89% 3272/3

1/3 * Epoch (train):  91% 3313/3657 [15:51<01:29,  3.86it/s, loss=0.561][A[A[A[A



1/3 * Epoch (train):  91% 3313/3657 [15:51<01:29,  3.86it/s, loss=0.109][A[A[A[A



1/3 * Epoch (train):  91% 3314/3657 [15:51<01:29,  3.84it/s, loss=0.109][A[A[A[A



1/3 * Epoch (train):  91% 3314/3657 [15:51<01:29,  3.84it/s, loss=0.052][A[A[A[A



1/3 * Epoch (train):  91% 3315/3657 [15:51<01:31,  3.72it/s, loss=0.052][A[A[A[A



1/3 * Epoch (train):  91% 3315/3657 [15:52<01:31,  3.72it/s, loss=0.108][A[A[A[A



1/3 * Epoch (train):  91% 3316/3657 [15:52<01:32,  3.68it/s, loss=0.108][A[A[A[A



1/3 * Epoch (train):  91% 3316/3657 [15:52<01:32,  3.68it/s, loss=0.105][A[A[A[A



1/3 * Epoch (train):  91% 3317/3657 [15:52<01:34,  3.60it/s, loss=0.105][A[A[A[A



1/3 * Epoch (train):  91% 3317/3657 [15:52<01:34,  3.60it/s, loss=0.149][A[A[A[A



1/3 * Epoch (train):  91% 3318/3657 [15:52<01:35,  3.56it/s, loss=0.149][A[A[A[A



1/3 * Epoch (train):  91% 3318/3

1/3 * Epoch (train):  92% 3359/3657 [16:04<01:24,  3.52it/s, loss=0.071][A[A[A[A



1/3 * Epoch (train):  92% 3360/3657 [16:04<01:24,  3.53it/s, loss=0.071][A[A[A[A



1/3 * Epoch (train):  92% 3360/3657 [16:04<01:24,  3.53it/s, loss=0.185][A[A[A[A



1/3 * Epoch (train):  92% 3361/3657 [16:04<01:23,  3.54it/s, loss=0.185][A[A[A[A



1/3 * Epoch (train):  92% 3361/3657 [16:04<01:23,  3.54it/s, loss=2.356][A[A[A[A



1/3 * Epoch (train):  92% 3362/3657 [16:04<01:26,  3.43it/s, loss=2.356][A[A[A[A



1/3 * Epoch (train):  92% 3362/3657 [16:05<01:26,  3.43it/s, loss=0.167][A[A[A[A



1/3 * Epoch (train):  92% 3363/3657 [16:05<01:26,  3.42it/s, loss=0.167][A[A[A[A



1/3 * Epoch (train):  92% 3363/3657 [16:05<01:26,  3.42it/s, loss=0.104][A[A[A[A



1/3 * Epoch (train):  92% 3364/3657 [16:05<01:23,  3.50it/s, loss=0.104][A[A[A[A



1/3 * Epoch (train):  92% 3364/3657 [16:05<01:23,  3.50it/s, loss=0.050][A[A[A[A



1/3 * Epoch (train):  92% 3365/3

1/3 * Epoch (train):  93% 3406/3657 [16:17<01:10,  3.56it/s, loss=0.063][A[A[A[A



1/3 * Epoch (train):  93% 3406/3657 [16:17<01:10,  3.56it/s, loss=0.046][A[A[A[A



1/3 * Epoch (train):  93% 3407/3657 [16:17<01:09,  3.59it/s, loss=0.046][A[A[A[A



1/3 * Epoch (train):  93% 3407/3657 [16:17<01:09,  3.59it/s, loss=0.039][A[A[A[A



1/3 * Epoch (train):  93% 3408/3657 [16:17<01:08,  3.62it/s, loss=0.039][A[A[A[A



1/3 * Epoch (train):  93% 3408/3657 [16:17<01:08,  3.62it/s, loss=0.085][A[A[A[A



1/3 * Epoch (train):  93% 3409/3657 [16:17<01:08,  3.63it/s, loss=0.085][A[A[A[A



1/3 * Epoch (train):  93% 3409/3657 [16:18<01:08,  3.63it/s, loss=0.517][A[A[A[A



1/3 * Epoch (train):  93% 3410/3657 [16:18<01:09,  3.54it/s, loss=0.517][A[A[A[A



1/3 * Epoch (train):  93% 3410/3657 [16:18<01:09,  3.54it/s, loss=0.095][A[A[A[A



1/3 * Epoch (train):  93% 3411/3657 [16:18<01:09,  3.54it/s, loss=0.095][A[A[A[A



1/3 * Epoch (train):  93% 3411/3

1/3 * Epoch (train):  94% 3452/3657 [16:30<01:00,  3.37it/s, loss=0.209][A[A[A[A



1/3 * Epoch (train):  94% 3453/3657 [16:30<01:01,  3.29it/s, loss=0.209][A[A[A[A



1/3 * Epoch (train):  94% 3453/3657 [16:30<01:01,  3.29it/s, loss=0.187][A[A[A[A



1/3 * Epoch (train):  94% 3454/3657 [16:30<01:02,  3.27it/s, loss=0.187][A[A[A[A



1/3 * Epoch (train):  94% 3454/3657 [16:31<01:02,  3.27it/s, loss=0.059][A[A[A[A



1/3 * Epoch (train):  94% 3455/3657 [16:31<01:02,  3.26it/s, loss=0.059][A[A[A[A



1/3 * Epoch (train):  94% 3455/3657 [16:31<01:02,  3.26it/s, loss=0.056][A[A[A[A



1/3 * Epoch (train):  95% 3456/3657 [16:31<01:02,  3.24it/s, loss=0.056][A[A[A[A



1/3 * Epoch (train):  95% 3456/3657 [16:31<01:02,  3.24it/s, loss=0.141][A[A[A[A



1/3 * Epoch (train):  95% 3457/3657 [16:31<01:00,  3.29it/s, loss=0.141][A[A[A[A



1/3 * Epoch (train):  95% 3457/3657 [16:31<01:00,  3.29it/s, loss=0.219][A[A[A[A



1/3 * Epoch (train):  95% 3458/3

1/3 * Epoch (train):  96% 3499/3657 [16:43<00:42,  3.68it/s, loss=0.087][A[A[A[A



1/3 * Epoch (train):  96% 3499/3657 [16:43<00:42,  3.68it/s, loss=0.128][A[A[A[A



1/3 * Epoch (train):  96% 3500/3657 [16:43<00:44,  3.55it/s, loss=0.128][A[A[A[A



1/3 * Epoch (train):  96% 3500/3657 [16:43<00:44,  3.55it/s, loss=0.044][A[A[A[A



1/3 * Epoch (train):  96% 3501/3657 [16:43<00:43,  3.60it/s, loss=0.044][A[A[A[A



1/3 * Epoch (train):  96% 3501/3657 [16:43<00:43,  3.60it/s, loss=0.551][A[A[A[A



1/3 * Epoch (train):  96% 3502/3657 [16:43<00:42,  3.62it/s, loss=0.551][A[A[A[A



1/3 * Epoch (train):  96% 3502/3657 [16:44<00:42,  3.62it/s, loss=0.162][A[A[A[A



1/3 * Epoch (train):  96% 3503/3657 [16:44<00:42,  3.64it/s, loss=0.162][A[A[A[A



1/3 * Epoch (train):  96% 3503/3657 [16:44<00:42,  3.64it/s, loss=0.224][A[A[A[A



1/3 * Epoch (train):  96% 3504/3657 [16:44<00:42,  3.60it/s, loss=0.224][A[A[A[A



1/3 * Epoch (train):  96% 3504/3

1/3 * Epoch (train):  97% 3545/3657 [16:56<00:32,  3.44it/s, loss=3.858][A[A[A[A



1/3 * Epoch (train):  97% 3546/3657 [16:56<00:34,  3.22it/s, loss=3.858][A[A[A[A



1/3 * Epoch (train):  97% 3546/3657 [16:56<00:34,  3.22it/s, loss=0.042][A[A[A[A



1/3 * Epoch (train):  97% 3547/3657 [16:56<00:34,  3.19it/s, loss=0.042][A[A[A[A



1/3 * Epoch (train):  97% 3547/3657 [16:57<00:34,  3.19it/s, loss=0.093][A[A[A[A



1/3 * Epoch (train):  97% 3548/3657 [16:57<00:33,  3.27it/s, loss=0.093][A[A[A[A



1/3 * Epoch (train):  97% 3548/3657 [16:57<00:33,  3.27it/s, loss=0.044][A[A[A[A



1/3 * Epoch (train):  97% 3549/3657 [16:57<00:33,  3.23it/s, loss=0.044][A[A[A[A



1/3 * Epoch (train):  97% 3549/3657 [16:57<00:33,  3.23it/s, loss=0.293][A[A[A[A



1/3 * Epoch (train):  97% 3550/3657 [16:57<00:31,  3.36it/s, loss=0.293][A[A[A[A



1/3 * Epoch (train):  97% 3550/3657 [16:58<00:31,  3.36it/s, loss=0.047][A[A[A[A



1/3 * Epoch (train):  97% 3551/3

1/3 * Epoch (train):  98% 3592/3657 [17:10<00:20,  3.23it/s, loss=0.082][A[A[A[A



1/3 * Epoch (train):  98% 3592/3657 [17:10<00:20,  3.23it/s, loss=0.137][A[A[A[A



1/3 * Epoch (train):  98% 3593/3657 [17:10<00:19,  3.22it/s, loss=0.137][A[A[A[A



1/3 * Epoch (train):  98% 3593/3657 [17:10<00:19,  3.22it/s, loss=0.337][A[A[A[A



1/3 * Epoch (train):  98% 3594/3657 [17:10<00:19,  3.23it/s, loss=0.337][A[A[A[A



1/3 * Epoch (train):  98% 3594/3657 [17:10<00:19,  3.23it/s, loss=0.078][A[A[A[A



1/3 * Epoch (train):  98% 3595/3657 [17:10<00:19,  3.20it/s, loss=0.078][A[A[A[A



1/3 * Epoch (train):  98% 3595/3657 [17:11<00:19,  3.20it/s, loss=0.105][A[A[A[A



1/3 * Epoch (train):  98% 3596/3657 [17:11<00:18,  3.22it/s, loss=0.105][A[A[A[A



1/3 * Epoch (train):  98% 3596/3657 [17:11<00:18,  3.22it/s, loss=0.327][A[A[A[A



1/3 * Epoch (train):  98% 3597/3657 [17:11<00:18,  3.21it/s, loss=0.327][A[A[A[A



1/3 * Epoch (train):  98% 3597/3

1/3 * Epoch (train):  99% 3638/3657 [17:24<00:05,  3.28it/s, loss=0.093][A[A[A[A



1/3 * Epoch (train): 100% 3639/3657 [17:24<00:05,  3.23it/s, loss=0.093][A[A[A[A



1/3 * Epoch (train): 100% 3639/3657 [17:24<00:05,  3.23it/s, loss=0.077][A[A[A[A



1/3 * Epoch (train): 100% 3640/3657 [17:24<00:05,  3.27it/s, loss=0.077][A[A[A[A



1/3 * Epoch (train): 100% 3640/3657 [17:24<00:05,  3.27it/s, loss=0.161][A[A[A[A



1/3 * Epoch (train): 100% 3641/3657 [17:24<00:04,  3.34it/s, loss=0.161][A[A[A[A



1/3 * Epoch (train): 100% 3641/3657 [17:25<00:04,  3.34it/s, loss=0.240][A[A[A[A



1/3 * Epoch (train): 100% 3642/3657 [17:25<00:04,  3.30it/s, loss=0.240][A[A[A[A



1/3 * Epoch (train): 100% 3642/3657 [17:25<00:04,  3.30it/s, loss=2.059][A[A[A[A



1/3 * Epoch (train): 100% 3643/3657 [17:25<00:04,  3.33it/s, loss=2.059][A[A[A[A



1/3 * Epoch (train): 100% 3643/3657 [17:25<00:04,  3.33it/s, loss=0.156][A[A[A[A



1/3 * Epoch (train): 100% 3644/3

1/3 * Epoch (valid):  14% 37/262 [00:02<00:13, 17.30it/s, loss=0.076][A[A[A[A



1/3 * Epoch (valid):  14% 37/262 [00:02<00:13, 17.30it/s, loss=0.161][A[A[A[A



1/3 * Epoch (valid):  15% 38/262 [00:02<00:12, 17.30it/s, loss=0.066][A[A[A[A



1/3 * Epoch (valid):  15% 39/262 [00:02<00:13, 17.05it/s, loss=0.066][A[A[A[A



1/3 * Epoch (valid):  15% 39/262 [00:02<00:13, 17.05it/s, loss=0.189][A[A[A[A



1/3 * Epoch (valid):  15% 40/262 [00:02<00:13, 17.05it/s, loss=0.063][A[A[A[A



1/3 * Epoch (valid):  16% 41/262 [00:02<00:12, 17.66it/s, loss=0.063][A[A[A[A



1/3 * Epoch (valid):  16% 41/262 [00:02<00:12, 17.66it/s, loss=0.047][A[A[A[A



1/3 * Epoch (valid):  16% 42/262 [00:02<00:12, 17.66it/s, loss=0.149][A[A[A[A



1/3 * Epoch (valid):  16% 43/262 [00:02<00:12, 18.12it/s, loss=0.149][A[A[A[A



1/3 * Epoch (valid):  16% 43/262 [00:02<00:12, 18.12it/s, loss=0.122][A[A[A[A



1/3 * Epoch (valid):  17% 44/262 [00:03<00:12, 18.12it/s, loss=0.

1/3 * Epoch (valid):  39% 101/262 [00:06<00:08, 18.92it/s, loss=0.128][A[A[A[A



1/3 * Epoch (valid):  39% 101/262 [00:06<00:08, 18.92it/s, loss=0.293][A[A[A[A



1/3 * Epoch (valid):  39% 102/262 [00:06<00:08, 18.92it/s, loss=0.314][A[A[A[A



1/3 * Epoch (valid):  39% 103/262 [00:06<00:08, 17.79it/s, loss=0.314][A[A[A[A



1/3 * Epoch (valid):  39% 103/262 [00:06<00:08, 17.79it/s, loss=0.216][A[A[A[A



1/3 * Epoch (valid):  40% 104/262 [00:06<00:08, 17.79it/s, loss=0.191][A[A[A[A



1/3 * Epoch (valid):  40% 105/262 [00:06<00:09, 17.25it/s, loss=0.191][A[A[A[A



1/3 * Epoch (valid):  40% 105/262 [00:06<00:09, 17.25it/s, loss=0.258][A[A[A[A



1/3 * Epoch (valid):  40% 106/262 [00:06<00:09, 17.25it/s, loss=0.887][A[A[A[A



1/3 * Epoch (valid):  41% 107/262 [00:06<00:08, 17.75it/s, loss=0.887][A[A[A[A



1/3 * Epoch (valid):  41% 107/262 [00:06<00:08, 17.75it/s, loss=0.064][A[A[A[A



1/3 * Epoch (valid):  41% 108/262 [00:06<00:08, 17.75i

1/3 * Epoch (valid):  63% 164/262 [00:09<00:05, 19.56it/s, loss=0.081][A[A[A[A



1/3 * Epoch (valid):  63% 165/262 [00:09<00:05, 18.17it/s, loss=0.081][A[A[A[A



1/3 * Epoch (valid):  63% 165/262 [00:09<00:05, 18.17it/s, loss=0.243][A[A[A[A



1/3 * Epoch (valid):  63% 166/262 [00:09<00:05, 18.17it/s, loss=0.059][A[A[A[A



1/3 * Epoch (valid):  64% 167/262 [00:09<00:05, 18.59it/s, loss=0.059][A[A[A[A



1/3 * Epoch (valid):  64% 167/262 [00:09<00:05, 18.59it/s, loss=0.037][A[A[A[A



1/3 * Epoch (valid):  64% 168/262 [00:09<00:05, 18.59it/s, loss=0.148][A[A[A[A



1/3 * Epoch (valid):  65% 169/262 [00:09<00:04, 18.98it/s, loss=0.148][A[A[A[A



1/3 * Epoch (valid):  65% 169/262 [00:09<00:04, 18.98it/s, loss=0.064][A[A[A[A



1/3 * Epoch (valid):  65% 170/262 [00:09<00:04, 18.98it/s, loss=0.224][A[A[A[A



1/3 * Epoch (valid):  65% 171/262 [00:09<00:04, 19.26it/s, loss=0.224][A[A[A[A



1/3 * Epoch (valid):  65% 171/262 [00:09<00:04, 19.26i

1/3 * Epoch (valid):  87% 228/262 [00:12<00:01, 19.05it/s, loss=0.391][A[A[A[A



1/3 * Epoch (valid):  87% 229/262 [00:12<00:01, 19.05it/s, loss=0.228][A[A[A[A



1/3 * Epoch (valid):  88% 230/262 [00:12<00:01, 18.94it/s, loss=0.228][A[A[A[A



1/3 * Epoch (valid):  88% 230/262 [00:13<00:01, 18.94it/s, loss=0.289][A[A[A[A



1/3 * Epoch (valid):  88% 231/262 [00:13<00:01, 18.94it/s, loss=0.090][A[A[A[A



1/3 * Epoch (valid):  89% 232/262 [00:13<00:01, 19.23it/s, loss=0.090][A[A[A[A



1/3 * Epoch (valid):  89% 232/262 [00:13<00:01, 19.23it/s, loss=0.037][A[A[A[A



1/3 * Epoch (valid):  89% 233/262 [00:13<00:01, 19.23it/s, loss=0.251][A[A[A[A



1/3 * Epoch (valid):  89% 234/262 [00:13<00:01, 19.23it/s, loss=0.211][A[A[A[A



1/3 * Epoch (valid):  90% 235/262 [00:13<00:01, 19.50it/s, loss=0.211][A[A[A[A



1/3 * Epoch (valid):  90% 235/262 [00:13<00:01, 19.50it/s, loss=0.263][A[A[A[A



1/3 * Epoch (valid):  90% 236/262 [00:13<00:01, 19.50i

2/3 * Epoch (train):   1% 20/3657 [00:06<17:56,  3.38it/s, loss=0.261][A[A[A[A



2/3 * Epoch (train):   1% 20/3657 [00:07<17:56,  3.38it/s, loss=0.059][A[A[A[A



2/3 * Epoch (train):   1% 21/3657 [00:07<18:02,  3.36it/s, loss=0.059][A[A[A[A



2/3 * Epoch (train):   1% 21/3657 [00:07<18:02,  3.36it/s, loss=0.436][A[A[A[A



2/3 * Epoch (train):   1% 22/3657 [00:07<18:09,  3.34it/s, loss=0.436][A[A[A[A



2/3 * Epoch (train):   1% 22/3657 [00:07<18:09,  3.34it/s, loss=0.174][A[A[A[A



2/3 * Epoch (train):   1% 23/3657 [00:07<18:14,  3.32it/s, loss=0.174][A[A[A[A



2/3 * Epoch (train):   1% 23/3657 [00:07<18:14,  3.32it/s, loss=0.141][A[A[A[A



2/3 * Epoch (train):   1% 24/3657 [00:07<18:26,  3.28it/s, loss=0.141][A[A[A[A



2/3 * Epoch (train):   1% 24/3657 [00:08<18:26,  3.28it/s, loss=0.682][A[A[A[A



2/3 * Epoch (train):   1% 25/3657 [00:08<18:16,  3.31it/s, loss=0.682][A[A[A[A



2/3 * Epoch (train):   1% 25/3657 [00:08<18:16,  3.31i

2/3 * Epoch (train):   2% 67/3657 [00:21<18:49,  3.18it/s, loss=0.067][A[A[A[A



2/3 * Epoch (train):   2% 68/3657 [00:21<18:32,  3.23it/s, loss=0.067][A[A[A[A



2/3 * Epoch (train):   2% 68/3657 [00:21<18:32,  3.23it/s, loss=0.114][A[A[A[A



2/3 * Epoch (train):   2% 69/3657 [00:21<18:28,  3.24it/s, loss=0.114][A[A[A[A



2/3 * Epoch (train):   2% 69/3657 [00:21<18:28,  3.24it/s, loss=0.082][A[A[A[A



2/3 * Epoch (train):   2% 70/3657 [00:21<18:24,  3.25it/s, loss=0.082][A[A[A[A



2/3 * Epoch (train):   2% 70/3657 [00:22<18:24,  3.25it/s, loss=1.172][A[A[A[A



2/3 * Epoch (train):   2% 71/3657 [00:22<18:25,  3.24it/s, loss=1.172][A[A[A[A



2/3 * Epoch (train):   2% 71/3657 [00:22<18:25,  3.24it/s, loss=0.180][A[A[A[A



2/3 * Epoch (train):   2% 72/3657 [00:22<18:40,  3.20it/s, loss=0.180][A[A[A[A



2/3 * Epoch (train):   2% 72/3657 [00:22<18:40,  3.20it/s, loss=0.087][A[A[A[A



2/3 * Epoch (train):   2% 73/3657 [00:22<18:41,  3.20i

2/3 * Epoch (train):   3% 114/3657 [00:35<18:30,  3.19it/s, loss=0.149][A[A[A[A



2/3 * Epoch (train):   3% 115/3657 [00:35<18:13,  3.24it/s, loss=0.149][A[A[A[A



2/3 * Epoch (train):   3% 115/3657 [00:35<18:13,  3.24it/s, loss=0.838][A[A[A[A



2/3 * Epoch (train):   3% 116/3657 [00:35<17:50,  3.31it/s, loss=0.838][A[A[A[A



2/3 * Epoch (train):   3% 116/3657 [00:36<17:50,  3.31it/s, loss=0.050][A[A[A[A



2/3 * Epoch (train):   3% 117/3657 [00:36<17:44,  3.33it/s, loss=0.050][A[A[A[A



2/3 * Epoch (train):   3% 117/3657 [00:36<17:44,  3.33it/s, loss=0.146][A[A[A[A



2/3 * Epoch (train):   3% 118/3657 [00:36<17:38,  3.34it/s, loss=0.146][A[A[A[A



2/3 * Epoch (train):   3% 118/3657 [00:36<17:38,  3.34it/s, loss=0.107][A[A[A[A



2/3 * Epoch (train):   3% 119/3657 [00:36<17:32,  3.36it/s, loss=0.107][A[A[A[A



2/3 * Epoch (train):   3% 119/3657 [00:36<17:32,  3.36it/s, loss=0.198][A[A[A[A



2/3 * Epoch (train):   3% 120/3657 [00:36<1

2/3 * Epoch (train):   4% 161/3657 [00:49<17:45,  3.28it/s, loss=0.053][A[A[A[A



2/3 * Epoch (train):   4% 162/3657 [00:49<17:31,  3.32it/s, loss=0.053][A[A[A[A



2/3 * Epoch (train):   4% 162/3657 [00:49<17:31,  3.32it/s, loss=0.134][A[A[A[A



2/3 * Epoch (train):   4% 163/3657 [00:49<17:18,  3.36it/s, loss=0.134][A[A[A[A



2/3 * Epoch (train):   4% 163/3657 [00:50<17:18,  3.36it/s, loss=0.242][A[A[A[A



2/3 * Epoch (train):   4% 164/3657 [00:50<17:17,  3.37it/s, loss=0.242][A[A[A[A



2/3 * Epoch (train):   4% 164/3657 [00:50<17:17,  3.37it/s, loss=0.295][A[A[A[A



2/3 * Epoch (train):   5% 165/3657 [00:50<17:18,  3.36it/s, loss=0.295][A[A[A[A



2/3 * Epoch (train):   5% 165/3657 [00:50<17:18,  3.36it/s, loss=0.075][A[A[A[A



2/3 * Epoch (train):   5% 166/3657 [00:50<17:06,  3.40it/s, loss=0.075][A[A[A[A



2/3 * Epoch (train):   5% 166/3657 [00:50<17:06,  3.40it/s, loss=0.121][A[A[A[A



2/3 * Epoch (train):   5% 167/3657 [00:50<1

2/3 * Epoch (train):   6% 208/3657 [01:04<19:28,  2.95it/s, loss=0.183][A[A[A[A



2/3 * Epoch (train):   6% 209/3657 [01:04<19:08,  3.00it/s, loss=0.183][A[A[A[A



2/3 * Epoch (train):   6% 209/3657 [01:04<19:08,  3.00it/s, loss=0.353][A[A[A[A



2/3 * Epoch (train):   6% 210/3657 [01:04<18:52,  3.04it/s, loss=0.353][A[A[A[A



2/3 * Epoch (train):   6% 210/3657 [01:05<18:52,  3.04it/s, loss=0.018][A[A[A[A



2/3 * Epoch (train):   6% 211/3657 [01:05<18:41,  3.07it/s, loss=0.018][A[A[A[A



2/3 * Epoch (train):   6% 211/3657 [01:05<18:41,  3.07it/s, loss=0.319][A[A[A[A



2/3 * Epoch (train):   6% 212/3657 [01:05<18:35,  3.09it/s, loss=0.319][A[A[A[A



2/3 * Epoch (train):   6% 212/3657 [01:05<18:35,  3.09it/s, loss=66.340][A[A[A[A



2/3 * Epoch (train):   6% 213/3657 [01:05<18:37,  3.08it/s, loss=66.340][A[A[A[A



2/3 * Epoch (train):   6% 213/3657 [01:06<18:37,  3.08it/s, loss=0.103] [A[A[A[A



2/3 * Epoch (train):   6% 214/3657 [01:0

2/3 * Epoch (train):   7% 255/3657 [01:19<15:39,  3.62it/s, loss=0.049][A[A[A[A



2/3 * Epoch (train):   7% 256/3657 [01:19<15:44,  3.60it/s, loss=0.049][A[A[A[A



2/3 * Epoch (train):   7% 256/3657 [01:19<15:44,  3.60it/s, loss=0.089][A[A[A[A



2/3 * Epoch (train):   7% 257/3657 [01:19<15:39,  3.62it/s, loss=0.089][A[A[A[A



2/3 * Epoch (train):   7% 257/3657 [01:19<15:39,  3.62it/s, loss=0.186][A[A[A[A



2/3 * Epoch (train):   7% 258/3657 [01:19<16:03,  3.53it/s, loss=0.186][A[A[A[A



2/3 * Epoch (train):   7% 258/3657 [01:20<16:03,  3.53it/s, loss=0.100][A[A[A[A



2/3 * Epoch (train):   7% 259/3657 [01:20<16:26,  3.45it/s, loss=0.100][A[A[A[A



2/3 * Epoch (train):   7% 259/3657 [01:20<16:26,  3.45it/s, loss=0.088][A[A[A[A



2/3 * Epoch (train):   7% 260/3657 [01:20<16:21,  3.46it/s, loss=0.088][A[A[A[A



2/3 * Epoch (train):   7% 260/3657 [01:20<16:21,  3.46it/s, loss=0.035][A[A[A[A



2/3 * Epoch (train):   7% 261/3657 [01:20<1

2/3 * Epoch (train):   8% 302/3657 [01:32<14:54,  3.75it/s, loss=0.086][A[A[A[A



2/3 * Epoch (train):   8% 303/3657 [01:32<14:53,  3.75it/s, loss=0.086][A[A[A[A



2/3 * Epoch (train):   8% 303/3657 [01:32<14:53,  3.75it/s, loss=0.053][A[A[A[A



2/3 * Epoch (train):   8% 304/3657 [01:32<14:49,  3.77it/s, loss=0.053][A[A[A[A



2/3 * Epoch (train):   8% 304/3657 [01:32<14:49,  3.77it/s, loss=0.876][A[A[A[A



2/3 * Epoch (train):   8% 305/3657 [01:32<14:51,  3.76it/s, loss=0.876][A[A[A[A



2/3 * Epoch (train):   8% 305/3657 [01:33<14:51,  3.76it/s, loss=0.091][A[A[A[A



2/3 * Epoch (train):   8% 306/3657 [01:33<14:53,  3.75it/s, loss=0.091][A[A[A[A



2/3 * Epoch (train):   8% 306/3657 [01:33<14:53,  3.75it/s, loss=0.044][A[A[A[A



2/3 * Epoch (train):   8% 307/3657 [01:33<14:41,  3.80it/s, loss=0.044][A[A[A[A



2/3 * Epoch (train):   8% 307/3657 [01:33<14:41,  3.80it/s, loss=0.150][A[A[A[A



2/3 * Epoch (train):   8% 308/3657 [01:33<1

2/3 * Epoch (train):  10% 349/3657 [01:45<14:48,  3.72it/s, loss=0.229][A[A[A[A



2/3 * Epoch (train):  10% 350/3657 [01:45<14:43,  3.74it/s, loss=0.229][A[A[A[A



2/3 * Epoch (train):  10% 350/3657 [01:46<14:43,  3.74it/s, loss=0.064][A[A[A[A



2/3 * Epoch (train):  10% 351/3657 [01:46<14:38,  3.76it/s, loss=0.064][A[A[A[A



2/3 * Epoch (train):  10% 351/3657 [01:46<14:38,  3.76it/s, loss=0.067][A[A[A[A



2/3 * Epoch (train):  10% 352/3657 [01:46<14:35,  3.77it/s, loss=0.067][A[A[A[A



2/3 * Epoch (train):  10% 352/3657 [01:46<14:35,  3.77it/s, loss=0.131][A[A[A[A



2/3 * Epoch (train):  10% 353/3657 [01:46<14:35,  3.77it/s, loss=0.131][A[A[A[A



2/3 * Epoch (train):  10% 353/3657 [01:46<14:35,  3.77it/s, loss=0.153][A[A[A[A



2/3 * Epoch (train):  10% 354/3657 [01:46<14:35,  3.77it/s, loss=0.153][A[A[A[A



2/3 * Epoch (train):  10% 354/3657 [01:47<14:35,  3.77it/s, loss=0.088][A[A[A[A



2/3 * Epoch (train):  10% 355/3657 [01:47<1

In [119]:
torch.save(model.state_dict(), './model_balance.pth')

In [120]:
# Results
model = Model()
model.load_state_dict(torch.load( f"{DATADIR}model_balance.pth"))
criterion = nn.MSELoss()
batch = next(iter(valid_loader))
output = model(batch['features'])

loss = criterion(output,batch['targets'])
print("Loss: ", loss)
print("Prediction: ", output*np.sqrt(disp)+mean)

Loss:  tensor(0.1382, grad_fn=<MseLossBackward>)
Prediction:  tensor([[  8530.9229],
        [  2237.5354],
        [ 32236.1836],
        [ 32738.2812],
        [ 31670.2070],
        [ 29873.0117],
        [ 39716.1992],
        [ 38400.8828],
        [ 26392.9277],
        [ 26533.6309],
        [ 35061.2695],
        [ 32983.9922],
        [-30158.9863],
        [-30803.9121],
        [  -620.5317],
        [-19605.6328],
        [ 35765.7969],
        [ 33961.1523],
        [ 37802.1328],
        [ 35757.5078],
        [-11247.3545],
        [-12581.0283],
        [ -2200.2246],
        [ -6601.7285],
        [-15263.5850],
        [-15113.0977],
        [ 30507.1055],
        [ 29300.2227],
        [ 41693.0898],
        [ 40304.7617],
        [ 35598.0000],
        [ 36432.2188],
        [-15704.2803],
        [-14870.8125],
        [ -9172.7266],
        [ -8689.7510],
        [ 41606.6641],
        [ 39472.2461],
        [ 41683.7500],
        [ 42309.9258],
        [ 16385.37