In [8]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/cosinesim-ft-talent-v1/COSINESIM_FINETUNE_DATASET_16k_final.csv
/kaggle/input/ml-talent-hack-parsedtrain/output.csv


## **paraphrase-multilingual-mpnet-base-v2 (Multilingual XLM-roBERTa) FINE-TUNING**

# Here, we assume our dataset for fine-tuning is fully ready (text corpuses are preproccessed, cosine similarity is calcluated):

In [2]:
df = pd.read_csv('/kaggle/input/cosinesim-ft-talent-v1/COSINESIM_FINETUNE_DATASET_16k_final.csv')
df.sample(3)

Unnamed: 0,CONCATED_x,CONCATED_y,COSINE_SIM
369,ЗП: 40000 руб.. Ищет работу на должность:: Инж...,title: Senior DevOps Engineer. salary: з/п не ...,0.517165
833,ЗП: 70000 руб.. Ищет работу на должность:: Про...,title: Бэкенд-разработчик. salary: от 120000 д...,0.521445
13180,ЗП: 55000 руб.. Ищет работу на должность:: Инж...,title: Go разработчик. salary: з/п не указана....,0.536804


## Let's import and initialize everything we need:

In [6]:
# !pip install sentence-transformers

In [38]:
from transformers import AutoTokenizer, AutoModel
from sentence_transformers.losses import ContrastiveLoss
from sentence_transformers import SentenceTransformer
import torch
import torch.nn as nn

model = AutoModel.from_pretrained('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')

#for clean PyTorch this can be used:
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] 
    #as it contains all embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(toke_embeddings.size()).float()
    #att mask size -> token embs size
    sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
    #paddings to zero
    sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    #normalization excluding zero
    
    return sum_embeddings / sum_mask #returning avg

#for simplicity let's use one already incl. pooling
model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/4.13k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

### Case without splitting texts into corpuses as we will fine-tune on the full corpus data embeddings' similarity with dropping several columns (check README for reference):

In [10]:
from sentence_transformers import InputExample

val_data = pd.read_csv('/kaggle/input/ml-talent-hack-parsedtrain/output.csv')
cols = list(np.array(val_data.columns))
print(cols)

['Vacancy UUID', 'Vacancy Name', 'Keywords', 'Description', 'Comment', 'Resume UUID', 'First Name', 'Last Name', 'Birth Date', 'Country', 'City', 'About', 'Key Skills', 'Starts', 'Ends', 'Employer', 'Experience City', 'Position', 'Experience Description', 'Year', 'Organization', 'Faculty', 'Specialty', 'Result', 'Education Type', 'Education Level', 'Target']


In [20]:
cols_drop1 = cols[0:9] + cols[13:16] + [cols[19]] + [cols[-1]]
resumes_val = val_data.drop(columns=cols_drop1)
vac_val = val_data.iloc[:, 1:5]
labels_val = np.array(pd.get_dummies(val_data.iloc[:, -1]).astype(int).drop(columns='failed'))

In [16]:
resumes_val_text = []
for i in range(len(resumes_val)):
    curr_str = ""
    for j in range(len(resumes_val.columns)):
        if not pd.isna(resumes_val.iloc[i, j]):
            curr_str += str(resumes_val.iloc[i, j]) + ". "
    resumes_val_text.append(curr_str)
vac_val_text = []
for i in range(len(vac_val)):
    curr_str = ""
    for j in range(len(vac_val.columns)):
        if not pd.isna(vac_val.iloc[i, j]):
            curr_str += str(vac_val.iloc[i, j]) + ". "
    vac_val_text.append(curr_str)

In [21]:
len(vac_val_text), len(resumes_val_text), len(labels_val)

(656, 656, 656)

In [148]:
from torch.utils.data import DataLoader

val_dataset = [InputExample(texts=[resumes_val_text[i], vac_val_text[i]], label=labels_val[i]) for i in range(len(vac_val_text))]
train_dataset = [InputExample(texts=[df.iloc[i, 0], df.iloc[i, 1]], label=df.iloc[i, 2]) for i in range(len(df))]

def collate_fn(batch):
    texts = [example.texts for example in batch]
    labels = torch.tensor([example.label for example in batch])
    return texts, labels

train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=32, collate_fn=collate_fn)
val_dataloader = DataLoader(val_dataset, shuffle=False, batch_size=16, collate_fn=collate_fn)

# Testing metrics first:
### With threshold = 0.65

In [76]:
from tqdm import tqdm
from IPython.display import clear_output
eval_preds, eval_labels = [], []

model.eval()
with torch.no_grad():
    for batch in tqdm(val_dataloader):
        clear_output()
        texts, labels = batch
        emb1 = model.encode([x[0] for x in texts], convert_to_tensor=True, normalize_embeddings=False)
        emb2 = model.encode([x[1] for x in texts], convert_to_tensor=True, normalize_embeddings=False)
        cos_sim = nn.functional.cosine_similarity(emb1, emb2)
        preds = (cos_sim > 0.65).int()
        eval_preds.append(preds.cpu().tolist())
        eval_labels.append(labels.cpu().tolist())

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 41/41 [00:06<00:00,  6.42it/s]


In [77]:
eval_preds = np.array(eval_preds).reshape(-1, 1)
eval_labels = np.array(eval_labels).reshape(-1, 1)

In [78]:
from sklearn.metrics import classification_report

print(classification_report(eval_labels, eval_preds)) #DEFAULT

              precision    recall  f1-score   support

           0       0.70      0.97      0.82       463
           1       0.19      0.02      0.03       193

    accuracy                           0.69       656
   macro avg       0.45      0.49      0.42       656
weighted avg       0.55      0.69      0.58       656



In [162]:
from tqdm import tqdm
from IPython.display import clear_output
eval_preds, eval_labels = [], []

model.eval()
with torch.no_grad():
    for batch in tqdm(val_dataloader):
        clear_output()
        texts, labels = batch
        emb1 = model.encode([x[0] for x in texts], convert_to_tensor=True, normalize_embeddings=False)
        emb2 = model.encode([x[1] for x in texts], convert_to_tensor=True, normalize_embeddings=False)
        cos_sim = nn.functional.cosine_similarity(emb1, emb2)
        preds = (cos_sim > 0.65).int()
        eval_preds.append(preds.cpu().tolist())
        eval_labels.append(labels.cpu().tolist())

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 41/41 [00:06<00:00,  6.63it/s]


In [163]:
eval_preds = np.array(eval_preds).reshape(-1, 1)
eval_labels = np.array(eval_labels).reshape(-1, 1)

In [164]:
print(classification_report(eval_labels, eval_preds)) 

              precision    recall  f1-score   support

           0       0.70      0.97      0.82       463
           1       0.19      0.02      0.03       193

    accuracy                           0.69       656
   macro avg       0.45      0.49      0.42       656
weighted avg       0.55      0.69      0.58       656



### Acceptable, but we want more.

In [132]:
tqdm_bar = None
tqdm = None

# TODO: CUSTOM DATASET CLASS

In [None]:
from transformers import AdamW
from tqdm import tqdm
tqdm_notebook = tqdm
def nop(it, *a, **k):
    return it
tqdm_notebook.tqdm = nop

class ContrastiveLoss_v1(nn.Module):
    def __init__(self, margin=2.0):
        super(ContrastiveLoss_v1, self).__init__()
        self.margin = margin

    def forward(self, y1, y2, labels):
        # euclid
        euc_dist = nn.functional.pairwise_distance(y1, y2)
        
        # calc losses
        losses = (1 - labels) * torch.pow(euc_dist, 2) + \
                 labels * torch.pow(torch.clamp(self.margin - euc_dist, min=0.0), 2)
                 
        loss = torch.mean(losses)
        return loss


model.to('cuda')

loss_func = ContrastiveLoss_v1()
optimizer = AdamW(model.parameters(), lr=2e-5)

NUM_EPOCHS = 2

for epoch in tqdm(range(NUM_EPOCHS)):
    print(f"EPOCH {epoch+1}")
    model.train()
    total_loss = 0
    for step, batch in enumerate(tqdm(train_dataloader, desc=f"Epoch {epoch+1}", unit="batch")):
        optimizer.zero_grad()
        
        texts, labels = batch
        sentences_cv = [text[0] for text in texts]
        sentences_vac = [text[1] for text in texts]
        
        embs1 = model.encode(sentences_cv, convert_to_tensor=True, normalize_embeddings=False).requires_grad_()
        embs2 = model.encode(sentences_vac, convert_to_tensor=True, normalize_embeddings=False).requires_grad_()
        loss = loss_func(embs1.to('cuda'), embs2.to('cuda'), labels.to('cuda'))
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        if step % 100 == 1:
            print(f"Step {step}: Loss - {total_loss/step}")
        
    avg_loss = total_loss / len(train_dataloader)
    print(f"Training loss: {avg_loss:.4f}")
    
    ###val below
    
    model.eval()
    eval_preds, eval_preds1, eval_preds2 = [], [], []
    with torch.no_grad():
        val_loss = 0
        for step, batch in enumerate(val_dataloader):
            texts, labels = batch
            sentences_cv = [text[0] for text in texts]
            sentences_vac = [text[1] for text in texts]

            embs1 = model.encode(sentences_cv, convert_to_tensor=True, normalize_embeddings=False)
            embs2 = model.encode(sentences_vac, convert_to_tensor=True, normalize_embeddings=False)
            
            loss = loss_func(embs1.to('cuda'), embs2.to('cuda'), labels.to('cuda'))
            val_loss += loss.item()
            
            cos_sim = nn.functional.cosine_similarity(embs1, embs2)
            preds = (cos_sim > 0.4).int()
            preds1 = (cos_sim > 0.5).int()
            preds2 = (cos_sim > 0.6).int()
            eval_preds.append(preds.cpu().tolist())
            eval_preds1.append(preds1.cpu().tolist())
            eval_preds2.append(preds2.cpu().tolist())
            
    eval_preds = np.array(eval_preds).reshape(-1, 1)
    eval_preds1 = np.array(eval_preds1).reshape(-1, 1)
    eval_preds2 = np.array(eval_preds2).reshape(-1, 1)
    
            
    avg_val_loss = val_loss / len(val_dataloader)
    print(f"Validation loss: {avg_val_loss:.4f}\n")
    print("THRESHOLD 0.4:\n", classification_report(eval_labels, eval_preds))
    print("THRESHOLD 0.5:\n", classification_report(eval_labels, eval_preds1))
    print("THRESHOLD 0.6:\n", classification_report(eval_labels, eval_preds2))    

  0%|          | 0/2 [00:00<?, ?it/s]

EPOCH 1



Epoch 1:   0%|          | 0/497 [00:00<?, ?batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   0%|          | 1/497 [00:00<03:03,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   0%|          | 2/497 [00:00<03:04,  2.69batch/s][A

Step 1: Loss - 3.940902376799845


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   1%|          | 3/497 [00:01<02:56,  2.79batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   1%|          | 4/497 [00:01<03:01,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   1%|          | 5/497 [00:01<02:55,  2.81batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   1%|          | 6/497 [00:02<02:59,  2.74batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   1%|▏         | 7/497 [00:02<02:57,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   2%|▏         | 8/497 [00:02<03:04,  2.64batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   2%|▏         | 9/497 [00:03<03:10,  2.56batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   2%|▏         | 10/497 [00:03<03:07,  2.60batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   2%|▏         | 11/497 [00:04<03:04,  2.64batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   2%|▏         | 12/497 [00:04<03:06,  2.60batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   3%|▎         | 13/497 [00:04<03:04,  2.62batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   3%|▎         | 14/497 [00:05<03:01,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   3%|▎         | 15/497 [00:05<02:56,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   3%|▎         | 16/497 [00:05<02:56,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   3%|▎         | 17/497 [00:06<02:54,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   4%|▎         | 18/497 [00:06<02:53,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   4%|▍         | 19/497 [00:07<02:53,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   4%|▍         | 20/497 [00:07<02:56,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   4%|▍         | 21/497 [00:07<02:59,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   4%|▍         | 22/497 [00:08<03:00,  2.63batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   5%|▍         | 23/497 [00:08<03:06,  2.54batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   5%|▍         | 24/497 [00:09<03:06,  2.54batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   5%|▌         | 25/497 [00:09<03:03,  2.57batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   5%|▌         | 26/497 [00:09<03:02,  2.57batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   5%|▌         | 27/497 [00:10<02:58,  2.63batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   6%|▌         | 28/497 [00:10<02:55,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   6%|▌         | 29/497 [00:10<02:51,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   6%|▌         | 30/497 [00:11<02:48,  2.77batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   6%|▌         | 31/497 [00:11<02:51,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   6%|▋         | 32/497 [00:11<02:53,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   7%|▋         | 33/497 [00:12<02:50,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   7%|▋         | 34/497 [00:12<02:50,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   7%|▋         | 35/497 [00:13<02:50,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   7%|▋         | 36/497 [00:13<02:52,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   7%|▋         | 37/497 [00:13<02:47,  2.75batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   8%|▊         | 38/497 [00:14<02:47,  2.74batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   8%|▊         | 39/497 [00:14<02:53,  2.64batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   8%|▊         | 40/497 [00:14<02:48,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   8%|▊         | 41/497 [00:15<02:47,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   8%|▊         | 42/497 [00:15<02:50,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   9%|▊         | 43/497 [00:16<02:48,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   9%|▉         | 44/497 [00:16<02:48,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   9%|▉         | 45/497 [00:16<02:46,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   9%|▉         | 46/497 [00:17<02:45,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:   9%|▉         | 47/497 [00:17<02:48,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  10%|▉         | 48/497 [00:17<02:48,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  10%|▉         | 49/497 [00:18<02:47,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  10%|█         | 50/497 [00:18<02:52,  2.59batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  10%|█         | 51/497 [00:19<02:50,  2.61batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  10%|█         | 52/497 [00:19<02:50,  2.61batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  11%|█         | 53/497 [00:19<02:54,  2.54batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  11%|█         | 54/497 [00:20<02:50,  2.60batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  11%|█         | 55/497 [00:20<02:48,  2.62batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  11%|█▏        | 56/497 [00:20<02:42,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  11%|█▏        | 57/497 [00:21<02:41,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  12%|█▏        | 58/497 [00:21<02:40,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  12%|█▏        | 59/497 [00:22<02:35,  2.81batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  12%|█▏        | 60/497 [00:22<02:37,  2.78batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  12%|█▏        | 61/497 [00:22<02:44,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  12%|█▏        | 62/497 [00:23<02:41,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  13%|█▎        | 63/497 [00:23<02:42,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  13%|█▎        | 64/497 [00:23<02:44,  2.63batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  13%|█▎        | 65/497 [00:24<02:43,  2.64batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  13%|█▎        | 66/497 [00:24<02:42,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  13%|█▎        | 67/497 [00:25<02:47,  2.56batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  14%|█▎        | 68/497 [00:25<02:50,  2.52batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  14%|█▍        | 69/497 [00:25<02:51,  2.50batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  14%|█▍        | 70/497 [00:26<02:44,  2.59batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  14%|█▍        | 71/497 [00:26<02:41,  2.63batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  14%|█▍        | 72/497 [00:26<02:37,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  15%|█▍        | 73/497 [00:27<02:35,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  15%|█▍        | 74/497 [00:27<02:35,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  15%|█▌        | 75/497 [00:28<02:34,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  15%|█▌        | 76/497 [00:28<02:37,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  15%|█▌        | 77/497 [00:28<02:36,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  16%|█▌        | 78/497 [00:29<02:33,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  16%|█▌        | 79/497 [00:29<02:29,  2.81batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  16%|█▌        | 80/497 [00:29<02:29,  2.80batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  16%|█▋        | 81/497 [00:30<02:30,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  16%|█▋        | 82/497 [00:30<02:28,  2.79batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  17%|█▋        | 83/497 [00:30<02:33,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  17%|█▋        | 84/497 [00:31<02:32,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  17%|█▋        | 85/497 [00:31<02:29,  2.75batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  17%|█▋        | 86/497 [00:32<02:28,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  18%|█▊        | 87/497 [00:32<02:28,  2.75batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  18%|█▊        | 88/497 [00:32<02:31,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  18%|█▊        | 89/497 [00:33<02:29,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  18%|█▊        | 90/497 [00:33<02:32,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  18%|█▊        | 91/497 [00:33<02:29,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  19%|█▊        | 92/497 [00:34<02:30,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  19%|█▊        | 93/497 [00:34<02:29,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  19%|█▉        | 94/497 [00:35<02:29,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  19%|█▉        | 95/497 [00:35<02:29,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  19%|█▉        | 96/497 [00:35<02:30,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  20%|█▉        | 97/497 [00:36<02:33,  2.61batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  20%|█▉        | 98/497 [00:36<02:29,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  20%|█▉        | 99/497 [00:36<02:27,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  20%|██        | 100/497 [00:37<02:27,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  20%|██        | 101/497 [00:37<02:23,  2.77batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  21%|██        | 102/497 [00:37<02:21,  2.78batch/s][A

Step 101: Loss - 2.078084280551203


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  21%|██        | 103/497 [00:38<02:20,  2.80batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  21%|██        | 104/497 [00:38<02:24,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  21%|██        | 105/497 [00:39<02:24,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  21%|██▏       | 106/497 [00:39<02:20,  2.78batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  22%|██▏       | 107/497 [00:39<02:18,  2.82batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  22%|██▏       | 108/497 [00:40<02:16,  2.84batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  22%|██▏       | 109/497 [00:40<02:19,  2.79batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  22%|██▏       | 110/497 [00:40<02:19,  2.78batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  22%|██▏       | 111/497 [00:41<02:23,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  23%|██▎       | 112/497 [00:41<02:21,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  23%|██▎       | 113/497 [00:41<02:18,  2.77batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  23%|██▎       | 114/497 [00:42<02:18,  2.77batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  23%|██▎       | 115/497 [00:42<02:18,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  23%|██▎       | 116/497 [00:43<02:20,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  24%|██▎       | 117/497 [00:43<02:20,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  24%|██▎       | 118/497 [00:43<02:19,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  24%|██▍       | 119/497 [00:44<02:17,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  24%|██▍       | 120/497 [00:44<02:19,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  24%|██▍       | 121/497 [00:44<02:17,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  25%|██▍       | 122/497 [00:45<02:17,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  25%|██▍       | 123/497 [00:45<02:18,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  25%|██▍       | 124/497 [00:46<02:19,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  25%|██▌       | 125/497 [00:46<02:17,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  25%|██▌       | 126/497 [00:46<02:17,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  26%|██▌       | 127/497 [00:47<02:18,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  26%|██▌       | 128/497 [00:47<02:19,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  26%|██▌       | 129/497 [00:47<02:23,  2.57batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  26%|██▌       | 130/497 [00:48<02:23,  2.56batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  26%|██▋       | 131/497 [00:48<02:20,  2.60batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  27%|██▋       | 132/497 [00:49<02:20,  2.61batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  27%|██▋       | 133/497 [00:49<02:17,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  27%|██▋       | 134/497 [00:49<02:16,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  27%|██▋       | 135/497 [00:50<02:16,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  27%|██▋       | 136/497 [00:50<02:14,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  28%|██▊       | 137/497 [00:50<02:08,  2.80batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  28%|██▊       | 138/497 [00:51<02:11,  2.74batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  28%|██▊       | 139/497 [00:51<02:11,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  28%|██▊       | 140/497 [00:51<02:07,  2.80batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  28%|██▊       | 141/497 [00:52<02:04,  2.86batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  29%|██▊       | 142/497 [00:52<02:08,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  29%|██▉       | 143/497 [00:53<02:08,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  29%|██▉       | 144/497 [00:53<02:11,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  29%|██▉       | 145/497 [00:53<02:08,  2.74batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  29%|██▉       | 146/497 [00:54<02:13,  2.63batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  30%|██▉       | 147/497 [00:54<02:12,  2.63batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  30%|██▉       | 148/497 [00:55<02:13,  2.61batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  30%|██▉       | 149/497 [00:55<02:14,  2.58batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  30%|███       | 150/497 [00:55<02:15,  2.55batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  30%|███       | 151/497 [00:56<02:11,  2.62batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  31%|███       | 152/497 [00:56<02:13,  2.59batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  31%|███       | 153/497 [00:56<02:13,  2.57batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  31%|███       | 154/497 [00:57<02:17,  2.49batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  31%|███       | 155/497 [00:57<02:11,  2.60batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  31%|███▏      | 156/497 [00:58<02:10,  2.61batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  32%|███▏      | 157/497 [00:58<02:07,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  32%|███▏      | 158/497 [00:58<02:04,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  32%|███▏      | 159/497 [00:59<02:02,  2.75batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  32%|███▏      | 160/497 [00:59<02:02,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  32%|███▏      | 161/497 [00:59<02:02,  2.75batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  33%|███▎      | 162/497 [01:00<02:03,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  33%|███▎      | 163/497 [01:00<02:05,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  33%|███▎      | 164/497 [01:01<02:07,  2.62batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  33%|███▎      | 165/497 [01:01<02:05,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  33%|███▎      | 166/497 [01:01<02:06,  2.62batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  34%|███▎      | 167/497 [01:02<02:09,  2.55batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  34%|███▍      | 168/497 [01:02<02:07,  2.58batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  34%|███▍      | 169/497 [01:03<02:07,  2.58batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  34%|███▍      | 170/497 [01:03<02:03,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  34%|███▍      | 171/497 [01:03<02:01,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  35%|███▍      | 172/497 [01:04<01:59,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  35%|███▍      | 173/497 [01:04<01:59,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  35%|███▌      | 174/497 [01:04<01:59,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  35%|███▌      | 175/497 [01:05<01:56,  2.77batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  35%|███▌      | 176/497 [01:05<01:55,  2.77batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  36%|███▌      | 177/497 [01:05<01:55,  2.78batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  36%|███▌      | 178/497 [01:06<01:56,  2.75batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  36%|███▌      | 179/497 [01:06<01:53,  2.79batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  36%|███▌      | 180/497 [01:06<01:52,  2.82batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  36%|███▋      | 181/497 [01:07<01:55,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  37%|███▋      | 182/497 [01:07<01:53,  2.78batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  37%|███▋      | 183/497 [01:08<01:50,  2.84batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  37%|███▋      | 184/497 [01:08<01:52,  2.79batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  37%|███▋      | 185/497 [01:08<01:55,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  37%|███▋      | 186/497 [01:09<01:56,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  38%|███▊      | 187/497 [01:09<01:56,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  38%|███▊      | 188/497 [01:09<01:57,  2.62batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  38%|███▊      | 189/497 [01:10<01:56,  2.64batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  38%|███▊      | 190/497 [01:10<01:56,  2.64batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  38%|███▊      | 191/497 [01:11<01:57,  2.61batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  39%|███▊      | 192/497 [01:11<01:56,  2.62batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  39%|███▉      | 193/497 [01:11<01:55,  2.63batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  39%|███▉      | 194/497 [01:12<01:54,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  39%|███▉      | 195/497 [01:12<01:53,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  39%|███▉      | 196/497 [01:12<01:51,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  40%|███▉      | 197/497 [01:13<01:48,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  40%|███▉      | 198/497 [01:13<01:48,  2.75batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  40%|████      | 199/497 [01:14<01:51,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  40%|████      | 200/497 [01:14<01:52,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  40%|████      | 201/497 [01:14<01:51,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  41%|████      | 202/497 [01:15<01:48,  2.73batch/s][A

Step 201: Loss - 2.066258566551945


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  41%|████      | 203/497 [01:15<01:51,  2.63batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  41%|████      | 204/497 [01:15<01:51,  2.64batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  41%|████      | 205/497 [01:16<01:47,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  41%|████▏     | 206/497 [01:16<01:46,  2.74batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  42%|████▏     | 207/497 [01:16<01:43,  2.80batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  42%|████▏     | 208/497 [01:17<01:43,  2.79batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  42%|████▏     | 209/497 [01:17<01:47,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  42%|████▏     | 210/497 [01:18<01:45,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  42%|████▏     | 211/497 [01:18<01:47,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  43%|████▎     | 212/497 [01:18<01:47,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  43%|████▎     | 213/497 [01:19<01:44,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  43%|████▎     | 214/497 [01:19<01:41,  2.79batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  43%|████▎     | 215/497 [01:19<01:43,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  43%|████▎     | 216/497 [01:20<01:44,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  44%|████▎     | 217/497 [01:20<01:43,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  44%|████▍     | 218/497 [01:21<01:43,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  44%|████▍     | 219/497 [01:21<01:42,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  44%|████▍     | 220/497 [01:21<01:41,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  44%|████▍     | 221/497 [01:22<01:42,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  45%|████▍     | 222/497 [01:22<01:42,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  45%|████▍     | 223/497 [01:22<01:43,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  45%|████▌     | 224/497 [01:23<01:41,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  45%|████▌     | 225/497 [01:23<01:40,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  45%|████▌     | 226/497 [01:24<01:41,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  46%|████▌     | 227/497 [01:24<01:38,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  46%|████▌     | 228/497 [01:24<01:37,  2.75batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  46%|████▌     | 229/497 [01:25<01:38,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  46%|████▋     | 230/497 [01:25<01:38,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  46%|████▋     | 231/497 [01:25<01:39,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  47%|████▋     | 232/497 [01:26<01:35,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  47%|████▋     | 233/497 [01:26<01:35,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  47%|████▋     | 234/497 [01:26<01:36,  2.74batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  47%|████▋     | 235/497 [01:27<01:34,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  47%|████▋     | 236/497 [01:27<01:34,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  48%|████▊     | 237/497 [01:28<01:39,  2.61batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  48%|████▊     | 238/497 [01:28<01:42,  2.52batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  48%|████▊     | 239/497 [01:28<01:40,  2.56batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  48%|████▊     | 240/497 [01:29<01:37,  2.63batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  48%|████▊     | 241/497 [01:29<01:35,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  49%|████▊     | 242/497 [01:30<01:38,  2.60batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  49%|████▉     | 243/497 [01:30<01:35,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  49%|████▉     | 244/497 [01:30<01:34,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  49%|████▉     | 245/497 [01:31<01:32,  2.74batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  49%|████▉     | 246/497 [01:31<01:29,  2.79batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  50%|████▉     | 247/497 [01:31<01:32,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  50%|████▉     | 248/497 [01:32<01:32,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  50%|█████     | 249/497 [01:32<01:30,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  50%|█████     | 250/497 [01:32<01:28,  2.79batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  51%|█████     | 251/497 [01:33<01:29,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  51%|█████     | 252/497 [01:33<01:30,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  51%|█████     | 253/497 [01:34<01:30,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  51%|█████     | 254/497 [01:34<01:30,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  51%|█████▏    | 255/497 [01:34<01:31,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  52%|█████▏    | 256/497 [01:35<01:31,  2.63batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  52%|█████▏    | 257/497 [01:35<01:32,  2.60batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  52%|█████▏    | 258/497 [01:35<01:29,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  52%|█████▏    | 259/497 [01:36<01:28,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  52%|█████▏    | 260/497 [01:36<01:27,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  53%|█████▎    | 261/497 [01:37<01:26,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  53%|█████▎    | 262/497 [01:37<01:28,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  53%|█████▎    | 263/497 [01:37<01:27,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  53%|█████▎    | 264/497 [01:38<01:29,  2.62batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  53%|█████▎    | 265/497 [01:38<01:28,  2.63batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  54%|█████▎    | 266/497 [01:38<01:26,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  54%|█████▎    | 267/497 [01:39<01:25,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  54%|█████▍    | 268/497 [01:39<01:25,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  54%|█████▍    | 269/497 [01:40<01:24,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  54%|█████▍    | 270/497 [01:40<01:23,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  55%|█████▍    | 271/497 [01:40<01:23,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  55%|█████▍    | 272/497 [01:41<01:22,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  55%|█████▍    | 273/497 [01:41<01:22,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  55%|█████▌    | 274/497 [01:41<01:24,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  55%|█████▌    | 275/497 [01:42<01:22,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  56%|█████▌    | 276/497 [01:42<01:20,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  56%|█████▌    | 277/497 [01:42<01:18,  2.80batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  56%|█████▌    | 278/497 [01:43<01:19,  2.77batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  56%|█████▌    | 279/497 [01:43<01:20,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  56%|█████▋    | 280/497 [01:44<01:19,  2.74batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  57%|█████▋    | 281/497 [01:44<01:19,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  57%|█████▋    | 282/497 [01:44<01:17,  2.78batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  57%|█████▋    | 283/497 [01:45<01:17,  2.77batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  57%|█████▋    | 284/497 [01:45<01:18,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  57%|█████▋    | 285/497 [01:45<01:18,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  58%|█████▊    | 286/497 [01:46<01:17,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  58%|█████▊    | 287/497 [01:46<01:18,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  58%|█████▊    | 288/497 [01:47<01:16,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  58%|█████▊    | 289/497 [01:47<01:16,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  58%|█████▊    | 290/497 [01:47<01:19,  2.61batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  59%|█████▊    | 291/497 [01:48<01:18,  2.64batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  59%|█████▉    | 292/497 [01:48<01:16,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  59%|█████▉    | 293/497 [01:48<01:14,  2.75batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  59%|█████▉    | 294/497 [01:49<01:18,  2.59batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  59%|█████▉    | 295/497 [01:49<01:17,  2.61batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  60%|█████▉    | 296/497 [01:50<01:14,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  60%|█████▉    | 297/497 [01:50<01:12,  2.78batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  60%|█████▉    | 298/497 [01:50<01:12,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  60%|██████    | 299/497 [01:51<01:12,  2.74batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  60%|██████    | 300/497 [01:51<01:11,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  61%|██████    | 301/497 [01:51<01:13,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  61%|██████    | 302/497 [01:52<01:13,  2.67batch/s][A

Step 301: Loss - 2.06685935023324


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  61%|██████    | 303/497 [01:52<01:12,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  61%|██████    | 304/497 [01:52<01:11,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  61%|██████▏   | 305/497 [01:53<01:10,  2.74batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  62%|██████▏   | 306/497 [01:53<01:10,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  62%|██████▏   | 307/497 [01:54<01:13,  2.59batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  62%|██████▏   | 308/497 [01:54<01:10,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  62%|██████▏   | 309/497 [01:54<01:10,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  62%|██████▏   | 310/497 [01:55<01:09,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  63%|██████▎   | 311/497 [01:55<01:10,  2.64batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  63%|██████▎   | 312/497 [01:55<01:09,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  63%|██████▎   | 313/497 [01:56<01:08,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  63%|██████▎   | 314/497 [01:56<01:06,  2.75batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  63%|██████▎   | 315/497 [01:57<01:05,  2.77batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  64%|██████▎   | 316/497 [01:57<01:05,  2.77batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  64%|██████▍   | 317/497 [01:57<01:04,  2.78batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  64%|██████▍   | 318/497 [01:58<01:04,  2.79batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  64%|██████▍   | 319/497 [01:58<01:06,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  64%|██████▍   | 320/497 [01:58<01:06,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  65%|██████▍   | 321/497 [01:59<01:06,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  65%|██████▍   | 322/497 [01:59<01:09,  2.50batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  65%|██████▍   | 323/497 [02:00<01:10,  2.47batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  65%|██████▌   | 324/497 [02:00<01:05,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  65%|██████▌   | 325/497 [02:00<01:06,  2.58batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  66%|██████▌   | 326/497 [02:01<01:04,  2.63batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  66%|██████▌   | 327/497 [02:01<01:03,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  66%|██████▌   | 328/497 [02:01<01:01,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  66%|██████▌   | 329/497 [02:02<01:02,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  66%|██████▋   | 330/497 [02:02<01:01,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  67%|██████▋   | 331/497 [02:03<01:00,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  67%|██████▋   | 332/497 [02:03<00:58,  2.81batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  67%|██████▋   | 333/497 [02:03<00:58,  2.78batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  67%|██████▋   | 334/497 [02:04<00:57,  2.84batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  67%|██████▋   | 335/497 [02:04<00:58,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  68%|██████▊   | 336/497 [02:04<00:58,  2.77batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  68%|██████▊   | 337/497 [02:05<00:58,  2.74batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  68%|██████▊   | 338/497 [02:05<01:00,  2.63batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  68%|██████▊   | 339/497 [02:05<00:58,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  68%|██████▊   | 340/497 [02:06<00:58,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  69%|██████▊   | 341/497 [02:06<00:56,  2.75batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  69%|██████▉   | 342/497 [02:07<00:55,  2.80batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  69%|██████▉   | 343/497 [02:07<00:54,  2.81batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  69%|██████▉   | 344/497 [02:07<00:54,  2.81batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  69%|██████▉   | 345/497 [02:08<00:54,  2.81batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  70%|██████▉   | 346/497 [02:08<00:55,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  70%|██████▉   | 347/497 [02:08<00:54,  2.74batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  70%|███████   | 348/497 [02:09<00:54,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  70%|███████   | 349/497 [02:09<00:55,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  70%|███████   | 350/497 [02:10<00:55,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  71%|███████   | 351/497 [02:10<00:53,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  71%|███████   | 352/497 [02:10<00:53,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  71%|███████   | 353/497 [02:11<00:53,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  71%|███████   | 354/497 [02:11<00:53,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  71%|███████▏  | 355/497 [02:11<00:51,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  72%|███████▏  | 356/497 [02:12<00:51,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  72%|███████▏  | 357/497 [02:12<00:52,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  72%|███████▏  | 358/497 [02:12<00:50,  2.77batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  72%|███████▏  | 359/497 [02:13<00:50,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  72%|███████▏  | 360/497 [02:13<00:49,  2.75batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  73%|███████▎  | 361/497 [02:14<00:49,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  73%|███████▎  | 362/497 [02:14<00:49,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  73%|███████▎  | 363/497 [02:14<00:49,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  73%|███████▎  | 364/497 [02:15<00:49,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  73%|███████▎  | 365/497 [02:15<00:48,  2.71batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  74%|███████▎  | 366/497 [02:15<00:47,  2.74batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  74%|███████▍  | 367/497 [02:16<00:47,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  74%|███████▍  | 368/497 [02:16<00:48,  2.67batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  74%|███████▍  | 369/497 [02:16<00:46,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  74%|███████▍  | 370/497 [02:17<00:46,  2.75batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  75%|███████▍  | 371/497 [02:17<00:46,  2.73batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  75%|███████▍  | 372/497 [02:18<00:45,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  75%|███████▌  | 373/497 [02:18<00:44,  2.76batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  75%|███████▌  | 374/497 [02:18<00:45,  2.72batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  75%|███████▌  | 375/497 [02:19<00:45,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  76%|███████▌  | 376/497 [02:19<00:46,  2.61batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  76%|███████▌  | 377/497 [02:19<00:46,  2.59batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  76%|███████▌  | 378/497 [02:20<00:46,  2.55batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  76%|███████▋  | 379/497 [02:20<00:46,  2.53batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  76%|███████▋  | 380/497 [02:21<00:44,  2.60batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  77%|███████▋  | 381/497 [02:21<00:43,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  77%|███████▋  | 382/497 [02:21<00:44,  2.57batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  77%|███████▋  | 383/497 [02:22<00:43,  2.63batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  77%|███████▋  | 384/497 [02:22<00:41,  2.70batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  77%|███████▋  | 385/497 [02:23<00:41,  2.68batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  78%|███████▊  | 386/497 [02:23<00:41,  2.65batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  78%|███████▊  | 387/497 [02:23<00:42,  2.57batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  78%|███████▊  | 388/497 [02:24<00:40,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  78%|███████▊  | 389/497 [02:24<00:40,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  78%|███████▊  | 390/497 [02:24<00:40,  2.66batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  79%|███████▊  | 391/497 [02:25<00:39,  2.69batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  79%|███████▉  | 392/497 [02:25<00:37,  2.80batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  79%|███████▉  | 393/497 [02:25<00:37,  2.78batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  79%|███████▉  | 394/497 [02:26<00:36,  2.79batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  79%|███████▉  | 395/497 [02:26<00:36,  2.77batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1:  80%|███████▉  | 396/497 [02:27<00:36,  2.80batch/s][A

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [156]:
model

SentenceTransformer(
  (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: XLMRobertaModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

In [96]:
losses.ContrastiveLoss()

pytorch_metric_learning.losses.contrastive_loss.ContrastiveLoss

In [86]:
2e-5 > 0

True