In [37]:
pip install torch



In [38]:
pip install transformers



In [39]:
from transformers import GPT2TokenizerFast, GPT2LMHeadModel
#from transformers import BertConfig, BertModel
tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
#model = 'bert-base-uncased'.from_pretrained('bert-base-uncased')

In [40]:
SPECIAL_TOKENS_DICT = {
    'pad_token': '<pad>',
    'additional_special_tokens': ['<context>', '<slogan>'],
}

tokenizer.add_special_tokens(SPECIAL_TOKENS_DICT)
model.resize_token_embeddings(len(tokenizer))

Embedding(50260, 768)

In [41]:
import csv, torch
import regex as re
from torch.utils.data import Dataset

class SloganDataset(Dataset):
  def __init__(self, filename, tokenizer, seq_length=64):

    context_tkn = tokenizer.additional_special_tokens_ids[0]
    slogan_tkn = tokenizer.additional_special_tokens_ids[1]
    pad_tkn = tokenizer.pad_token_id
    eos_tkn = tokenizer.eos_token_id

    self.examples = []
    with open(filename, 'r', encoding='utf8', errors='ignore') as csvfile:
      reader = csv.reader(csvfile)
      #reader[1] = reader[1].encode('windows-1252').decode('utf-8')
      for row in reader:
        print("1: ",row)
        emptyString = ""
        row[0] = re.sub(r"\xa0", " ", row[0])
        row[1] = re.sub(r"\xa0", " ", row[1])
        row[0] = re.sub(r"Ã", " ", row[0])
        row[1] = re.sub(r"Ã", " ", row[1])
        row[0] = re.sub(r"Ã‚Â\xa0", ",", row[0])
        row[1] = re.sub(r"Ã‚Â\xa0", ",", row[1])
        row[0] = re.sub(r"ÃƒÂ©", "e", row[0])
        row[1] = re.sub(r"ÃƒÂ©", "e", row[1])
        print("2: ",row)
        # Build the context and slogan segments:
        context = [context_tkn] + tokenizer.encode(row[0], max_length=seq_length//2-1)
        slogan = [slogan_tkn] + tokenizer.encode(row[1], max_length=seq_length//2-2) + [eos_tkn]

        # Concatenate the two parts together:
        tokens = context + slogan + [pad_tkn] * ( seq_length - len(context) - len(slogan) )

        # Annotate each token with its corresponding segment:
        segments = [context_tkn] * len(context) + [slogan_tkn] * ( seq_length - len(context) )

        # Ignore the context, padding, and <slogan> tokens by setting their labels to -1
        labels = [-100] * (len(context)+1) + slogan[1:] + [-100] * ( seq_length - len(context) - len(slogan) )

        # Add the preprocessed example to the dataset
        self.examples.append((tokens, segments, labels))

  def __len__(self):
    return len(self.examples)

  def __getitem__(self, item):
    return torch.tensor(self.examples[item])

In [43]:
slogan_dataset = SloganDataset('/content/slogans.csv', tokenizer)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
1:  ['Audi automobiles', 'Everyone dreams of an Audi.']
2:  ['Audi automobiles', 'Everyone dreams of an Audi.']
1:  ['Volkswagen, the German car brand', 'Volkswagen. Drivers wanted.']
2:  ['Volkswagen, the German car brand', 'Volkswagen. Drivers wanted.']
1:  ['Volkswagen, the German car brand', 'For the love of the car.']
2:  ['Volkswagen, the German car brand', 'For the love of the car.']
1:  ['Volkswagen, the German car brand', 'Relieves gas pains.']
2:  ['Volkswagen, the German car brand', 'Relieves gas pains.']
1:  ['Volkswagen, the German car brand', 'Surprisingly ordinary prices']
2:  ['Volkswagen, the German car brand', 'Surprisingly ordinary prices']
1:  ['Peugeot, the French automobiles', 'Peugeot. Live the pleasure.']
2:  ['Peugeot, the French automobiles', 'Peugeot. Live the pleasure.']
1:  ['Peugeot, the French automobiles', 'The drive of your life.']
2:  ['Peugeot, the French automobiles', 'The drive of your

In [44]:
import math, random
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

# Create data indices for training and validation splits:

indices = list(range(len(slogan_dataset)))

random.seed(42)
random.shuffle(indices)

split = math.floor(0.1 * len(slogan_dataset))
train_indices, val_indices = indices[split:], indices[:split]

# Build the PyTorch data loaders:

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

train_loader = DataLoader(slogan_dataset, batch_size=32, sampler=train_sampler)
val_loader = DataLoader(slogan_dataset, batch_size=64, sampler=val_sampler)

In [45]:
import numpy as np
from tqdm import tqdm
def fit(model, optimizer, train_dl, val_dl, epochs=1, device=torch.device('cpu')):

  for i in range(epochs):

    print('\n--- Starting epoch #{} ---'.format(i))

    model.train()

    # These 2 lists will keep track of the batch losses and batch sizes over one epoch:
    losses = []
    nums = []

    for xb in tqdm(train_dl, desc="Training"):
      # Move the batch to the training device:
      print(len(xb))
      inputs = xb.to(device)
      print(inputs[:,1,:])
      # Call the model with the token ids, segment ids, and the ground truth (labels)
      outputs = model(inputs[:,0,:], token_type_ids=inputs[:,1,:], labels=inputs[:,2,:])

      # Add the loss and batch size to the list:
      loss = outputs[0]
      losses.append(loss.item())
      nums.append(len(xb))

      loss.backward()

      optimizer.step()
      model.zero_grad()

    # Compute the average cost over one epoch:
    train_cost = np.sum(np.multiply(losses, nums)) / sum(nums)


    # Now do the same thing for validation:

    model.eval()

    with torch.no_grad():
      losses = []
      nums = []

      for xb in tqdm(val_dl, desc="Validation"):
        inputs = xb.to(device)
        outputs = model(inputs[:,0,:], token_type_ids=inputs[:,1,:], labels=inputs[:,2,:])
        losses.append(outputs[0].item())
        nums.append(len(xb))

    val_cost = np.sum(np.multiply(losses, nums)) / sum(nums)

    print('\n--- Epoch #{} finished --- Training cost: {} / Validation cost: {}'.format(i, train_cost, val_cost))

In [46]:
from transformers import AdamW

# Move the model to the GPU:
device = torch.device('cuda')
model.to(device)

# Fine-tune GPT2 for two epochs:
optimizer = AdamW(model.parameters())
fit(model, optimizer, train_loader, val_loader, epochs=2, device=device)

Training:   0%|          | 0/287 [00:00<?, ?it/s]


--- Starting epoch #0 ---
32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   0%|          | 1/287 [00:00<02:50,  1.68it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   1%|          | 2/287 [00:01<02:44,  1.74it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   1%|          | 3/287 [00:01<02:42,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   1%|▏         | 4/287 [00:02<02:38,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   2%|▏         | 5/287 [00:02<02:38,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   2%|▏         | 6/287 [00:03<02:36,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   2%|▏         | 7/287 [00:03<02:35,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   3%|▎         | 8/287 [00:04<02:34,  1.81it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   3%|▎         | 9/287 [00:04<02:34,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   3%|▎         | 10/287 [00:05<02:33,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   4%|▍         | 11/287 [00:06<02:33,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   4%|▍         | 12/287 [00:06<02:32,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   5%|▍         | 13/287 [00:07<02:31,  1.81it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   5%|▍         | 14/287 [00:07<02:31,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   5%|▌         | 15/287 [00:08<02:31,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   6%|▌         | 16/287 [00:08<02:31,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   6%|▌         | 17/287 [00:09<02:31,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   6%|▋         | 18/287 [00:10<02:30,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   7%|▋         | 19/287 [00:10<02:30,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   7%|▋         | 20/287 [00:11<02:30,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   7%|▋         | 21/287 [00:11<02:29,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   8%|▊         | 22/287 [00:12<02:29,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   8%|▊         | 23/287 [00:12<02:28,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   8%|▊         | 24/287 [00:13<02:28,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   9%|▊         | 25/287 [00:13<02:28,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   9%|▉         | 26/287 [00:14<02:27,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   9%|▉         | 27/287 [00:15<02:27,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  10%|▉         | 28/287 [00:15<02:26,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  10%|█         | 29/287 [00:16<02:26,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  10%|█         | 30/287 [00:16<02:26,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  11%|█         | 31/287 [00:17<02:26,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  11%|█         | 32/287 [00:17<02:25,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  11%|█▏        | 33/287 [00:18<02:25,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  12%|█▏        | 34/287 [00:19<02:24,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  12%|█▏        | 35/287 [00:19<02:24,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  13%|█▎        | 36/287 [00:20<02:23,  1.74it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  13%|█▎        | 37/287 [00:20<02:23,  1.74it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  13%|█▎        | 38/287 [00:21<02:23,  1.74it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  14%|█▎        | 39/287 [00:21<02:22,  1.74it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  14%|█▍        | 40/287 [00:22<02:22,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  14%|█▍        | 41/287 [00:23<02:22,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  15%|█▍        | 42/287 [00:23<02:21,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  15%|█▍        | 43/287 [00:24<02:21,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  15%|█▌        | 44/287 [00:24<02:20,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  16%|█▌        | 45/287 [00:25<02:20,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  16%|█▌        | 46/287 [00:26<02:19,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  16%|█▋        | 47/287 [00:26<02:18,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  17%|█▋        | 48/287 [00:27<02:18,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  17%|█▋        | 49/287 [00:27<02:18,  1.72it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  17%|█▋        | 50/287 [00:28<02:17,  1.72it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  18%|█▊        | 51/287 [00:28<02:17,  1.72it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  18%|█▊        | 52/287 [00:29<02:16,  1.72it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  18%|█▊        | 53/287 [00:30<02:15,  1.72it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  19%|█▉        | 54/287 [00:30<02:15,  1.72it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  19%|█▉        | 55/287 [00:31<02:14,  1.72it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  20%|█▉        | 56/287 [00:31<02:14,  1.72it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  20%|█▉        | 57/287 [00:32<02:13,  1.72it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  20%|██        | 58/287 [00:33<02:12,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  21%|██        | 59/287 [00:33<02:12,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  21%|██        | 60/287 [00:34<02:11,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  21%|██▏       | 61/287 [00:34<02:10,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  22%|██▏       | 62/287 [00:35<02:09,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  22%|██▏       | 63/287 [00:35<02:09,  1.73it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  22%|██▏       | 64/287 [00:36<02:08,  1.74it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  23%|██▎       | 65/287 [00:37<02:07,  1.74it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  23%|██▎       | 66/287 [00:37<02:06,  1.74it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  23%|██▎       | 67/287 [00:38<02:05,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  24%|██▎       | 68/287 [00:38<02:05,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  24%|██▍       | 69/287 [00:39<02:04,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  24%|██▍       | 70/287 [00:39<02:03,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  25%|██▍       | 71/287 [00:40<02:03,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  25%|██▌       | 72/287 [00:41<02:02,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  25%|██▌       | 73/287 [00:41<02:02,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  26%|██▌       | 74/287 [00:42<02:01,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  26%|██▌       | 75/287 [00:42<02:00,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  26%|██▋       | 76/287 [00:43<02:00,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  27%|██▋       | 77/287 [00:43<01:59,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  27%|██▋       | 78/287 [00:44<01:58,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  28%|██▊       | 79/287 [00:45<01:58,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  28%|██▊       | 80/287 [00:45<01:57,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  28%|██▊       | 81/287 [00:46<01:57,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  29%|██▊       | 82/287 [00:46<01:56,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  29%|██▉       | 83/287 [00:47<01:55,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  29%|██▉       | 84/287 [00:47<01:55,  1.75it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  30%|██▉       | 85/287 [00:48<01:54,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  30%|██▉       | 86/287 [00:48<01:53,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  30%|███       | 87/287 [00:49<01:52,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  31%|███       | 88/287 [00:50<01:52,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  31%|███       | 89/287 [00:50<01:51,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  31%|███▏      | 90/287 [00:51<01:50,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  32%|███▏      | 91/287 [00:51<01:50,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  32%|███▏      | 92/287 [00:52<01:49,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  32%|███▏      | 93/287 [00:52<01:49,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  33%|███▎      | 94/287 [00:53<01:48,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  33%|███▎      | 95/287 [00:54<01:47,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  33%|███▎      | 96/287 [00:54<01:47,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  34%|███▍      | 97/287 [00:55<01:46,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  34%|███▍      | 98/287 [00:55<01:46,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  34%|███▍      | 99/287 [00:56<01:45,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  35%|███▍      | 100/287 [00:56<01:44,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  35%|███▌      | 101/287 [00:57<01:44,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  36%|███▌      | 102/287 [00:57<01:43,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  36%|███▌      | 103/287 [00:58<01:42,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  36%|███▌      | 104/287 [00:59<01:42,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  37%|███▋      | 105/287 [00:59<01:41,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  37%|███▋      | 106/287 [01:00<01:41,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  37%|███▋      | 107/287 [01:00<01:40,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  38%|███▊      | 108/287 [01:01<01:39,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  38%|███▊      | 109/287 [01:01<01:39,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  38%|███▊      | 110/287 [01:02<01:38,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  39%|███▊      | 111/287 [01:02<01:38,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  39%|███▉      | 112/287 [01:03<01:37,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  39%|███▉      | 113/287 [01:04<01:36,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  40%|███▉      | 114/287 [01:04<01:36,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  40%|████      | 115/287 [01:05<01:35,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  40%|████      | 116/287 [01:05<01:35,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  41%|████      | 117/287 [01:06<01:34,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  41%|████      | 118/287 [01:06<01:34,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  41%|████▏     | 119/287 [01:07<01:33,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  42%|████▏     | 120/287 [01:07<01:32,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  42%|████▏     | 121/287 [01:08<01:32,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  43%|████▎     | 122/287 [01:09<01:31,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  43%|████▎     | 123/287 [01:09<01:31,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  43%|████▎     | 124/287 [01:10<01:30,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  44%|████▎     | 125/287 [01:10<01:30,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  44%|████▍     | 126/287 [01:11<01:29,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  44%|████▍     | 127/287 [01:11<01:28,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  45%|████▍     | 128/287 [01:12<01:28,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  45%|████▍     | 129/287 [01:12<01:27,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  45%|████▌     | 130/287 [01:13<01:27,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  46%|████▌     | 131/287 [01:14<01:26,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  46%|████▌     | 132/287 [01:14<01:26,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  46%|████▋     | 133/287 [01:15<01:25,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  47%|████▋     | 134/287 [01:15<01:25,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  47%|████▋     | 135/287 [01:16<01:24,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  47%|████▋     | 136/287 [01:16<01:24,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  48%|████▊     | 137/287 [01:17<01:23,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  48%|████▊     | 138/287 [01:17<01:23,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  48%|████▊     | 139/287 [01:18<01:22,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  49%|████▉     | 140/287 [01:19<01:22,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  49%|████▉     | 141/287 [01:19<01:21,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  49%|████▉     | 142/287 [01:20<01:20,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  50%|████▉     | 143/287 [01:20<01:20,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  50%|█████     | 144/287 [01:21<01:19,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  51%|█████     | 145/287 [01:21<01:19,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  51%|█████     | 146/287 [01:22<01:18,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  51%|█████     | 147/287 [01:23<01:18,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  52%|█████▏    | 148/287 [01:23<01:17,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  52%|█████▏    | 149/287 [01:24<01:16,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  52%|█████▏    | 150/287 [01:24<01:16,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  53%|█████▎    | 151/287 [01:25<01:15,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  53%|█████▎    | 152/287 [01:25<01:15,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  53%|█████▎    | 153/287 [01:26<01:14,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  54%|█████▎    | 154/287 [01:26<01:14,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  54%|█████▍    | 155/287 [01:27<01:13,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  54%|█████▍    | 156/287 [01:28<01:13,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  55%|█████▍    | 157/287 [01:28<01:12,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  55%|█████▌    | 158/287 [01:29<01:12,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  55%|█████▌    | 159/287 [01:29<01:11,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  56%|█████▌    | 160/287 [01:30<01:11,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  56%|█████▌    | 161/287 [01:30<01:10,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  56%|█████▋    | 162/287 [01:31<01:10,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  57%|█████▋    | 163/287 [01:31<01:09,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  57%|█████▋    | 164/287 [01:32<01:09,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  57%|█████▋    | 165/287 [01:33<01:08,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  58%|█████▊    | 166/287 [01:33<01:07,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  58%|█████▊    | 167/287 [01:34<01:07,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  59%|█████▊    | 168/287 [01:34<01:06,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  59%|█████▉    | 169/287 [01:35<01:06,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  59%|█████▉    | 170/287 [01:35<01:05,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  60%|█████▉    | 171/287 [01:36<01:05,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  60%|█████▉    | 172/287 [01:37<01:04,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  60%|██████    | 173/287 [01:37<01:04,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  61%|██████    | 174/287 [01:38<01:03,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  61%|██████    | 175/287 [01:38<01:03,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  61%|██████▏   | 176/287 [01:39<01:02,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  62%|██████▏   | 177/287 [01:39<01:02,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  62%|██████▏   | 178/287 [01:40<01:01,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  62%|██████▏   | 179/287 [01:40<01:01,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  63%|██████▎   | 180/287 [01:41<01:00,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  63%|██████▎   | 181/287 [01:42<01:00,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  63%|██████▎   | 182/287 [01:42<00:59,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  64%|██████▍   | 183/287 [01:43<00:58,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  64%|██████▍   | 184/287 [01:43<00:58,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  64%|██████▍   | 185/287 [01:44<00:57,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  65%|██████▍   | 186/287 [01:44<00:57,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  65%|██████▌   | 187/287 [01:45<00:56,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  66%|██████▌   | 188/287 [01:46<00:56,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  66%|██████▌   | 189/287 [01:46<00:55,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  66%|██████▌   | 190/287 [01:47<00:55,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  67%|██████▋   | 191/287 [01:47<00:54,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  67%|██████▋   | 192/287 [01:48<00:53,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  67%|██████▋   | 193/287 [01:48<00:53,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  68%|██████▊   | 194/287 [01:49<00:52,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  68%|██████▊   | 195/287 [01:50<00:52,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  68%|██████▊   | 196/287 [01:50<00:51,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  69%|██████▊   | 197/287 [01:51<00:51,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  69%|██████▉   | 198/287 [01:51<00:50,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  69%|██████▉   | 199/287 [01:52<00:49,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  70%|██████▉   | 200/287 [01:52<00:49,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  70%|███████   | 201/287 [01:53<00:48,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  70%|███████   | 202/287 [01:54<00:48,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  71%|███████   | 203/287 [01:54<00:47,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  71%|███████   | 204/287 [01:55<00:47,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  71%|███████▏  | 205/287 [01:55<00:46,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  72%|███████▏  | 206/287 [01:56<00:46,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  72%|███████▏  | 207/287 [01:56<00:45,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  72%|███████▏  | 208/287 [01:57<00:44,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  73%|███████▎  | 209/287 [01:58<00:44,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  73%|███████▎  | 210/287 [01:58<00:43,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  74%|███████▎  | 211/287 [01:59<00:43,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  74%|███████▍  | 212/287 [01:59<00:42,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  74%|███████▍  | 213/287 [02:00<00:42,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  75%|███████▍  | 214/287 [02:00<00:41,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  75%|███████▍  | 215/287 [02:01<00:40,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  75%|███████▌  | 216/287 [02:02<00:40,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  76%|███████▌  | 217/287 [02:02<00:39,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  76%|███████▌  | 218/287 [02:03<00:39,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  76%|███████▋  | 219/287 [02:03<00:38,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  77%|███████▋  | 220/287 [02:04<00:37,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  77%|███████▋  | 221/287 [02:04<00:37,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  77%|███████▋  | 222/287 [02:05<00:36,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  78%|███████▊  | 223/287 [02:05<00:36,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  78%|███████▊  | 224/287 [02:06<00:35,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  78%|███████▊  | 225/287 [02:07<00:35,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  79%|███████▊  | 226/287 [02:07<00:34,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  79%|███████▉  | 227/287 [02:08<00:33,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  79%|███████▉  | 228/287 [02:08<00:33,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  80%|███████▉  | 229/287 [02:09<00:32,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  80%|████████  | 230/287 [02:09<00:32,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  80%|████████  | 231/287 [02:10<00:31,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  81%|████████  | 232/287 [02:11<00:31,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  81%|████████  | 233/287 [02:11<00:30,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  82%|████████▏ | 234/287 [02:12<00:29,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  82%|████████▏ | 235/287 [02:12<00:29,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  82%|████████▏ | 236/287 [02:13<00:28,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  83%|████████▎ | 237/287 [02:13<00:28,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  83%|████████▎ | 238/287 [02:14<00:27,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  83%|████████▎ | 239/287 [02:15<00:27,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  84%|████████▎ | 240/287 [02:15<00:26,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  84%|████████▍ | 241/287 [02:16<00:25,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  84%|████████▍ | 242/287 [02:16<00:25,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  85%|████████▍ | 243/287 [02:17<00:24,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  85%|████████▌ | 244/287 [02:17<00:24,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  85%|████████▌ | 245/287 [02:18<00:23,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  86%|████████▌ | 246/287 [02:18<00:23,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  86%|████████▌ | 247/287 [02:19<00:22,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  86%|████████▋ | 248/287 [02:20<00:22,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  87%|████████▋ | 249/287 [02:20<00:21,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  87%|████████▋ | 250/287 [02:21<00:20,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  87%|████████▋ | 251/287 [02:21<00:20,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  88%|████████▊ | 252/287 [02:22<00:19,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  88%|████████▊ | 253/287 [02:22<00:19,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  89%|████████▊ | 254/287 [02:23<00:18,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  89%|████████▉ | 255/287 [02:24<00:17,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  89%|████████▉ | 256/287 [02:24<00:17,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  90%|████████▉ | 257/287 [02:25<00:16,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  90%|████████▉ | 258/287 [02:25<00:16,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  90%|█████████ | 259/287 [02:26<00:15,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  91%|█████████ | 260/287 [02:26<00:15,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  91%|█████████ | 261/287 [02:27<00:14,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  91%|█████████▏| 262/287 [02:27<00:14,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  92%|█████████▏| 263/287 [02:28<00:13,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  92%|█████████▏| 264/287 [02:29<00:12,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  92%|█████████▏| 265/287 [02:29<00:12,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  93%|█████████▎| 266/287 [02:30<00:11,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  93%|█████████▎| 267/287 [02:30<00:11,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  93%|█████████▎| 268/287 [02:31<00:10,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  94%|█████████▎| 269/287 [02:31<00:10,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  94%|█████████▍| 270/287 [02:32<00:09,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  94%|█████████▍| 271/287 [02:33<00:09,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  95%|█████████▍| 272/287 [02:33<00:08,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  95%|█████████▌| 273/287 [02:34<00:07,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  95%|█████████▌| 274/287 [02:34<00:07,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  96%|█████████▌| 275/287 [02:35<00:06,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  96%|█████████▌| 276/287 [02:35<00:06,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  97%|█████████▋| 277/287 [02:36<00:05,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  97%|█████████▋| 278/287 [02:36<00:05,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  97%|█████████▋| 279/287 [02:37<00:04,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  98%|█████████▊| 280/287 [02:38<00:03,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  98%|█████████▊| 281/287 [02:38<00:03,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  98%|█████████▊| 282/287 [02:39<00:02,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  99%|█████████▊| 283/287 [02:39<00:02,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  99%|█████████▉| 284/287 [02:40<00:01,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  99%|█████████▉| 285/287 [02:40<00:01,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training: 100%|█████████▉| 286/287 [02:41<00:00,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50259,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training: 100%|██████████| 287/287 [02:42<00:00,  1.77it/s]
Validation: 100%|██████████| 16/16 [00:06<00:00,  2.56it/s]
Training:   0%|          | 0/287 [00:00<?, ?it/s]


--- Epoch #0 finished --- Training cost: 5.35646508213535 / Validation cost: 3.285345516952814

--- Starting epoch #1 ---
32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   0%|          | 1/287 [00:00<02:33,  1.87it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   1%|          | 2/287 [00:01<02:34,  1.84it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   1%|          | 3/287 [00:01<02:36,  1.82it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   1%|▏         | 4/287 [00:02<02:36,  1.81it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   2%|▏         | 5/287 [00:02<02:36,  1.80it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   2%|▏         | 6/287 [00:03<02:36,  1.79it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   2%|▏         | 7/287 [00:03<02:36,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   3%|▎         | 8/287 [00:04<02:36,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   3%|▎         | 9/287 [00:05<02:36,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   3%|▎         | 10/287 [00:05<02:35,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   4%|▍         | 11/287 [00:06<02:35,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   4%|▍         | 12/287 [00:06<02:34,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   5%|▍         | 13/287 [00:07<02:34,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   5%|▍         | 14/287 [00:07<02:33,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   5%|▌         | 15/287 [00:08<02:33,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   6%|▌         | 16/287 [00:08<02:32,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   6%|▌         | 17/287 [00:09<02:32,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   6%|▋         | 18/287 [00:10<02:31,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   7%|▋         | 19/287 [00:10<02:31,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   7%|▋         | 20/287 [00:11<02:30,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   7%|▋         | 21/287 [00:11<02:30,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   8%|▊         | 22/287 [00:12<02:29,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   8%|▊         | 23/287 [00:12<02:28,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   8%|▊         | 24/287 [00:13<02:28,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   9%|▊         | 25/287 [00:14<02:27,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   9%|▉         | 26/287 [00:14<02:26,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:   9%|▉         | 27/287 [00:15<02:26,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  10%|▉         | 28/287 [00:15<02:25,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  10%|█         | 29/287 [00:16<02:25,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  10%|█         | 30/287 [00:16<02:24,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  11%|█         | 31/287 [00:17<02:23,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  11%|█         | 32/287 [00:17<02:23,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  11%|█▏        | 33/287 [00:18<02:22,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  12%|█▏        | 34/287 [00:19<02:22,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  12%|█▏        | 35/287 [00:19<02:21,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  13%|█▎        | 36/287 [00:20<02:21,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  13%|█▎        | 37/287 [00:20<02:20,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  13%|█▎        | 38/287 [00:21<02:20,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  14%|█▎        | 39/287 [00:21<02:19,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  14%|█▍        | 40/287 [00:22<02:19,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  14%|█▍        | 41/287 [00:23<02:18,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  15%|█▍        | 42/287 [00:23<02:18,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  15%|█▍        | 43/287 [00:24<02:17,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  15%|█▌        | 44/287 [00:24<02:17,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  16%|█▌        | 45/287 [00:25<02:16,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  16%|█▌        | 46/287 [00:25<02:16,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  16%|█▋        | 47/287 [00:26<02:15,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  17%|█▋        | 48/287 [00:27<02:14,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  17%|█▋        | 49/287 [00:27<02:14,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  17%|█▋        | 50/287 [00:28<02:13,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  18%|█▊        | 51/287 [00:28<02:13,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  18%|█▊        | 52/287 [00:29<02:12,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  18%|█▊        | 53/287 [00:29<02:12,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  19%|█▉        | 54/287 [00:30<02:11,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  19%|█▉        | 55/287 [00:30<02:11,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  20%|█▉        | 56/287 [00:31<02:11,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  20%|█▉        | 57/287 [00:32<02:10,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  20%|██        | 58/287 [00:32<02:09,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  21%|██        | 59/287 [00:33<02:08,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  21%|██        | 60/287 [00:33<02:08,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  21%|██▏       | 61/287 [00:34<02:07,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  22%|██▏       | 62/287 [00:34<02:07,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  22%|██▏       | 63/287 [00:35<02:06,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  22%|██▏       | 64/287 [00:36<02:05,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50259,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  23%|██▎       | 65/287 [00:36<02:05,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  23%|██▎       | 66/287 [00:37<02:04,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  23%|██▎       | 67/287 [00:37<02:04,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  24%|██▎       | 68/287 [00:38<02:03,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  24%|██▍       | 69/287 [00:38<02:02,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  24%|██▍       | 70/287 [00:39<02:02,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  25%|██▍       | 71/287 [00:40<02:01,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  25%|██▌       | 72/287 [00:40<02:01,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  25%|██▌       | 73/287 [00:41<02:00,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  26%|██▌       | 74/287 [00:41<01:59,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  26%|██▌       | 75/287 [00:42<01:59,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  26%|██▋       | 76/287 [00:42<01:59,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  27%|██▋       | 77/287 [00:43<01:58,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  27%|██▋       | 78/287 [00:43<01:58,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  28%|██▊       | 79/287 [00:44<01:57,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  28%|██▊       | 80/287 [00:45<01:56,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  28%|██▊       | 81/287 [00:45<01:56,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  29%|██▊       | 82/287 [00:46<01:55,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  29%|██▉       | 83/287 [00:46<01:55,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  29%|██▉       | 84/287 [00:47<01:54,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  30%|██▉       | 85/287 [00:47<01:54,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  30%|██▉       | 86/287 [00:48<01:53,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  30%|███       | 87/287 [00:49<01:53,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  31%|███       | 88/287 [00:49<01:52,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  31%|███       | 89/287 [00:50<01:52,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  31%|███▏      | 90/287 [00:50<01:51,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  32%|███▏      | 91/287 [00:51<01:51,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  32%|███▏      | 92/287 [00:51<01:50,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  32%|███▏      | 93/287 [00:52<01:49,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  33%|███▎      | 94/287 [00:53<01:49,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  33%|███▎      | 95/287 [00:53<01:48,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  33%|███▎      | 96/287 [00:54<01:48,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  34%|███▍      | 97/287 [00:54<01:47,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  34%|███▍      | 98/287 [00:55<01:46,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  34%|███▍      | 99/287 [00:55<01:46,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  35%|███▍      | 100/287 [00:56<01:45,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  35%|███▌      | 101/287 [00:56<01:45,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  36%|███▌      | 102/287 [00:57<01:44,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  36%|███▌      | 103/287 [00:58<01:44,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  36%|███▌      | 104/287 [00:58<01:43,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  37%|███▋      | 105/287 [00:59<01:43,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  37%|███▋      | 106/287 [00:59<01:42,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  37%|███▋      | 107/287 [01:00<01:41,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  38%|███▊      | 108/287 [01:00<01:41,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  38%|███▊      | 109/287 [01:01<01:40,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  38%|███▊      | 110/287 [01:02<01:40,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  39%|███▊      | 111/287 [01:02<01:39,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  39%|███▉      | 112/287 [01:03<01:38,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  39%|███▉      | 113/287 [01:03<01:38,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  40%|███▉      | 114/287 [01:04<01:37,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  40%|████      | 115/287 [01:04<01:37,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  40%|████      | 116/287 [01:05<01:36,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  41%|████      | 117/287 [01:06<01:36,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  41%|████      | 118/287 [01:06<01:35,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  41%|████▏     | 119/287 [01:07<01:34,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  42%|████▏     | 120/287 [01:07<01:34,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  42%|████▏     | 121/287 [01:08<01:33,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  43%|████▎     | 122/287 [01:08<01:33,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  43%|████▎     | 123/287 [01:09<01:32,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  43%|████▎     | 124/287 [01:09<01:32,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  44%|████▎     | 125/287 [01:10<01:31,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  44%|████▍     | 126/287 [01:11<01:31,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  44%|████▍     | 127/287 [01:11<01:30,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  45%|████▍     | 128/287 [01:12<01:30,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  45%|████▍     | 129/287 [01:12<01:29,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  45%|████▌     | 130/287 [01:13<01:28,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  46%|████▌     | 131/287 [01:13<01:28,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  46%|████▌     | 132/287 [01:14<01:27,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  46%|████▋     | 133/287 [01:15<01:26,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  47%|████▋     | 134/287 [01:15<01:26,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  47%|████▋     | 135/287 [01:16<01:25,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  47%|████▋     | 136/287 [01:16<01:25,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  48%|████▊     | 137/287 [01:17<01:24,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  48%|████▊     | 138/287 [01:17<01:24,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  48%|████▊     | 139/287 [01:18<01:23,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  49%|████▉     | 140/287 [01:19<01:23,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  49%|████▉     | 141/287 [01:19<01:22,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  49%|████▉     | 142/287 [01:20<01:22,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  50%|████▉     | 143/287 [01:20<01:21,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  50%|█████     | 144/287 [01:21<01:21,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  51%|█████     | 145/287 [01:21<01:20,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  51%|█████     | 146/287 [01:22<01:20,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  51%|█████     | 147/287 [01:22<01:19,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  52%|█████▏    | 148/287 [01:23<01:18,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  52%|█████▏    | 149/287 [01:24<01:18,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  52%|█████▏    | 150/287 [01:24<01:17,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  53%|█████▎    | 151/287 [01:25<01:16,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  53%|█████▎    | 152/287 [01:25<01:16,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  53%|█████▎    | 153/287 [01:26<01:15,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  54%|█████▎    | 154/287 [01:26<01:15,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  54%|█████▍    | 155/287 [01:27<01:14,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  54%|█████▍    | 156/287 [01:28<01:14,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  55%|█████▍    | 157/287 [01:28<01:13,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  55%|█████▌    | 158/287 [01:29<01:13,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  55%|█████▌    | 159/287 [01:29<01:12,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  56%|█████▌    | 160/287 [01:30<01:11,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  56%|█████▌    | 161/287 [01:30<01:11,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  56%|█████▋    | 162/287 [01:31<01:10,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  57%|█████▋    | 163/287 [01:32<01:10,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  57%|█████▋    | 164/287 [01:32<01:09,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  57%|█████▋    | 165/287 [01:33<01:09,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  58%|█████▊    | 166/287 [01:33<01:08,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  58%|█████▊    | 167/287 [01:34<01:07,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  59%|█████▊    | 168/287 [01:34<01:07,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  59%|█████▉    | 169/287 [01:35<01:06,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  59%|█████▉    | 170/287 [01:36<01:06,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  60%|█████▉    | 171/287 [01:36<01:05,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  60%|█████▉    | 172/287 [01:37<01:05,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  60%|██████    | 173/287 [01:37<01:04,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  61%|██████    | 174/287 [01:38<01:04,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  61%|██████    | 175/287 [01:38<01:03,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  61%|██████▏   | 176/287 [01:39<01:03,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  62%|██████▏   | 177/287 [01:39<01:02,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  62%|██████▏   | 178/287 [01:40<01:01,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  62%|██████▏   | 179/287 [01:41<01:01,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  63%|██████▎   | 180/287 [01:41<01:00,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  63%|██████▎   | 181/287 [01:42<00:59,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  63%|██████▎   | 182/287 [01:42<00:59,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  64%|██████▍   | 183/287 [01:43<00:58,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  64%|██████▍   | 184/287 [01:43<00:58,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  64%|██████▍   | 185/287 [01:44<00:57,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  65%|██████▍   | 186/287 [01:45<00:57,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  65%|██████▌   | 187/287 [01:45<00:56,  1.76it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  66%|██████▌   | 188/287 [01:46<00:56,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  66%|██████▌   | 189/287 [01:46<00:55,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  66%|██████▌   | 190/287 [01:47<00:54,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  67%|██████▋   | 191/287 [01:47<00:54,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  67%|██████▋   | 192/287 [01:48<00:53,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  67%|██████▋   | 193/287 [01:49<00:53,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  68%|██████▊   | 194/287 [01:49<00:52,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  68%|██████▊   | 195/287 [01:50<00:52,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  68%|██████▊   | 196/287 [01:50<00:51,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  69%|██████▊   | 197/287 [01:51<00:50,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  69%|██████▉   | 198/287 [01:51<00:50,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  69%|██████▉   | 199/287 [01:52<00:49,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  70%|██████▉   | 200/287 [01:52<00:49,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  70%|███████   | 201/287 [01:53<00:48,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  70%|███████   | 202/287 [01:54<00:47,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  71%|███████   | 203/287 [01:54<00:47,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  71%|███████   | 204/287 [01:55<00:46,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  71%|███████▏  | 205/287 [01:55<00:46,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  72%|███████▏  | 206/287 [01:56<00:45,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  72%|███████▏  | 207/287 [01:56<00:45,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  72%|███████▏  | 208/287 [01:57<00:44,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  73%|███████▎  | 209/287 [01:58<00:44,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  73%|███████▎  | 210/287 [01:58<00:43,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  74%|███████▎  | 211/287 [01:59<00:42,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  74%|███████▍  | 212/287 [01:59<00:42,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  74%|███████▍  | 213/287 [02:00<00:41,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  75%|███████▍  | 214/287 [02:00<00:41,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  75%|███████▍  | 215/287 [02:01<00:40,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  75%|███████▌  | 216/287 [02:02<00:40,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  76%|███████▌  | 217/287 [02:02<00:39,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  76%|███████▌  | 218/287 [02:03<00:38,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  76%|███████▋  | 219/287 [02:03<00:38,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  77%|███████▋  | 220/287 [02:04<00:37,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  77%|███████▋  | 221/287 [02:04<00:37,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  77%|███████▋  | 222/287 [02:05<00:36,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  78%|███████▊  | 223/287 [02:05<00:36,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  78%|███████▊  | 224/287 [02:06<00:35,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  78%|███████▊  | 225/287 [02:07<00:35,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  79%|███████▊  | 226/287 [02:07<00:34,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  79%|███████▉  | 227/287 [02:08<00:33,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  79%|███████▉  | 228/287 [02:08<00:33,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  80%|███████▉  | 229/287 [02:09<00:32,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  80%|████████  | 230/287 [02:09<00:32,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  80%|████████  | 231/287 [02:10<00:31,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  81%|████████  | 232/287 [02:11<00:30,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  81%|████████  | 233/287 [02:11<00:30,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  82%|████████▏ | 234/287 [02:12<00:29,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  82%|████████▏ | 235/287 [02:12<00:29,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  82%|████████▏ | 236/287 [02:13<00:28,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  83%|████████▎ | 237/287 [02:13<00:28,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  83%|████████▎ | 238/287 [02:14<00:27,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  83%|████████▎ | 239/287 [02:14<00:27,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  84%|████████▎ | 240/287 [02:15<00:26,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  84%|████████▍ | 241/287 [02:16<00:25,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  84%|████████▍ | 242/287 [02:16<00:25,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  85%|████████▍ | 243/287 [02:17<00:24,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  85%|████████▌ | 244/287 [02:17<00:24,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  85%|████████▌ | 245/287 [02:18<00:23,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  86%|████████▌ | 246/287 [02:18<00:23,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  86%|████████▌ | 247/287 [02:19<00:22,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  86%|████████▋ | 248/287 [02:20<00:21,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  87%|████████▋ | 249/287 [02:20<00:21,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  87%|████████▋ | 250/287 [02:21<00:20,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  87%|████████▋ | 251/287 [02:21<00:20,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  88%|████████▊ | 252/287 [02:22<00:19,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  88%|████████▊ | 253/287 [02:22<00:19,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  89%|████████▊ | 254/287 [02:23<00:18,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  89%|████████▉ | 255/287 [02:24<00:18,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  89%|████████▉ | 256/287 [02:24<00:17,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  90%|████████▉ | 257/287 [02:25<00:16,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  90%|████████▉ | 258/287 [02:25<00:16,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  90%|█████████ | 259/287 [02:26<00:15,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  91%|█████████ | 260/287 [02:26<00:15,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  91%|█████████ | 261/287 [02:27<00:14,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  91%|█████████▏| 262/287 [02:27<00:14,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  92%|█████████▏| 263/287 [02:28<00:13,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  92%|█████████▏| 264/287 [02:29<00:12,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  92%|█████████▏| 265/287 [02:29<00:12,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  93%|█████████▎| 266/287 [02:30<00:11,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  93%|█████████▎| 267/287 [02:30<00:11,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  93%|█████████▎| 268/287 [02:31<00:10,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  94%|█████████▎| 269/287 [02:31<00:10,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  94%|█████████▍| 270/287 [02:32<00:09,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  94%|█████████▍| 271/287 [02:33<00:09,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  95%|█████████▍| 272/287 [02:33<00:08,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  95%|█████████▌| 273/287 [02:34<00:07,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  95%|█████████▌| 274/287 [02:34<00:07,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  96%|█████████▌| 275/287 [02:35<00:06,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  96%|█████████▌| 276/287 [02:35<00:06,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  97%|█████████▋| 277/287 [02:36<00:05,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  97%|█████████▋| 278/287 [02:37<00:05,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  97%|█████████▋| 279/287 [02:37<00:04,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  98%|█████████▊| 280/287 [02:38<00:03,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  98%|█████████▊| 281/287 [02:38<00:03,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  98%|█████████▊| 282/287 [02:39<00:02,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  99%|█████████▊| 283/287 [02:39<00:02,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  99%|█████████▉| 284/287 [02:40<00:01,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training:  99%|█████████▉| 285/287 [02:40<00:01,  1.78it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training: 100%|█████████▉| 286/287 [02:41<00:00,  1.77it/s]

32
tensor([[50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        ...,
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259],
        [50258, 50258, 50258,  ..., 50259, 50259, 50259]], device='cuda:0')


Training: 100%|██████████| 287/287 [02:42<00:00,  1.77it/s]
Validation: 100%|██████████| 16/16 [00:06<00:00,  2.54it/s]


--- Epoch #1 finished --- Training cost: 2.725821315619173 / Validation cost: 3.279965400695801





In [48]:
torch.save(model, '/content/model_slogan.pkl')

In [49]:
# Sampling functions with top k and top p from HuggingFace:

import torch.nn.functional as F
from tqdm import trange


def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
    """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
        Args:
            logits: logits distribution shape (batch size x vocabulary size)
            top_k > 0: keep only top k tokens with highest probability (top-k filtering).
            top_p > 0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
                Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
        From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
    """
    top_k = min(top_k, logits.size(-1))  # Safety check
    if top_k > 1:
        # Remove all tokens with a probability less than the last token of the top-k
        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
        logits[indices_to_remove] = filter_value

    if top_p > 0.5:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

        # Remove tokens with cumulative probability above the threshold
        sorted_indices_to_remove = cumulative_probs > top_p
        # Shift the indices to the right to keep also the first token above the threshold
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0

        # scatter sorted tensors to original indexing
        indices_to_remove = sorted_indices_to_remove.scatter(dim=1, index=sorted_indices, src=sorted_indices_to_remove)
        logits[indices_to_remove] = filter_value
    return logits


# From HuggingFace, adapted to work with the context/slogan separation:
def sample_sequence(model, length, context, segments_tokens=None, num_samples=1, temperature=1, top_k=0, top_p=0.0, repetition_penalty=1.0,
                    device='cpu'):
    context = torch.tensor(context, dtype=torch.long, device=device)
    context = context.unsqueeze(0).repeat(num_samples, 1)
    generated = context

    with torch.no_grad():
        for _ in trange(length):

            inputs = {'input_ids': generated}
            if segments_tokens != None:
              inputs['token_type_ids'] = torch.tensor(segments_tokens[:generated.shape[1]]).unsqueeze(0).repeat(num_samples, 1)


            outputs = model(**inputs)  # Note: we could also use 'past' with GPT-2/Transfo-XL/XLNet/CTRL (cached hidden-states)
            next_token_logits = outputs[0][:, -1, :] / (temperature if temperature > 0 else 1.)

            # repetition penalty from CTRL (https://arxiv.org/abs/1909.05858)
            for i in range(num_samples):
                for _ in set(generated[i].tolist()):
                    next_token_logits[i, _] /= repetition_penalty
                
            filtered_logits = top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p)
            if temperature == 0: # greedy sampling:
                next_token = torch.argmax(filtered_logits, dim=-1).unsqueeze(-1)
            else:
                next_token = torch.multinomial(F.softmax(filtered_logits, dim=-1), num_samples=1)
            generated = torch.cat((generated, next_token), dim=1)
    return generated

In [None]:
context = "Matrimonial"

context_tkn = tokenizer.additional_special_tokens_ids[0]
slogan_tkn = tokenizer.additional_special_tokens_ids[1]

input_ids = [context_tkn] + tokenizer.encode(context)

segments = [slogan_tkn] * 64
segments[:len(input_ids)] = [context_tkn] * len(input_ids)

input_ids += [slogan_tkn]

# Move the model back to the CPU for inference:
model.to(torch.device('cpu'))

# Generate 20 samples of max length 20
generated = sample_sequence(model, length=20, context=input_ids, segments_tokens=segments, num_samples=30)

print('\n\n--- Generated Slogans ---\n')

for g in generated:
  slogan = tokenizer.decode(g.squeeze().tolist())
  slogan = slogan.split('<|endoftext|>')[0].split('<slogan>')[1]
  print(slogan)

 45%|████▌     | 9/20 [00:10<00:14,  1.31s/it]