In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, ElectraForSequenceClassification, AdamW
from tqdm.notebook import tqdm

In [None]:
device = torch.device("cuda")

In [None]:
class DiscordCmdDataset(Dataset):

  def __init__(self, csv_file):
    # 일부 값중에 NaN이 있음...
    self.dataset = pd.read_csv(csv_file, sep=',').dropna(axis=0)
    # 중복제거
    self.dataset.drop_duplicates(subset=['sentence'], inplace=True)
    self.tokenizer = AutoTokenizer.from_pretrained("monologg/koelectra-small-v3-discriminator")

    print(self.dataset.describe())

  def __len__(self):
    return len(self.dataset)

  def __getitem__(self, idx):
    row = self.dataset.iloc[idx, 0:2].values
    text = row[0]
    y = row[1] - 1

    inputs = self.tokenizer(
        text,
        return_tensors='pt',
        truncation=True,
        max_length=256,
        pad_to_max_length=True,
        add_special_tokens=True
        )

    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]

    return input_ids, attention_mask, y

In [None]:
train_dataset = DiscordCmdDataset("train.csv")
test_dataset = DiscordCmdDataset("validation.csv")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/61.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/458 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/263k [00:00<?, ?B/s]

            label
count  272.000000
mean     2.268382
std      1.173835
min      1.000000
25%      1.000000
50%      2.000000
75%      3.000000
max      4.000000
           label
count  61.000000
mean    2.311475
std     1.148056
min     1.000000
25%     1.000000
50%     2.000000
75%     3.000000
max     4.000000


In [None]:
model = ElectraForSequenceClassification.from_pretrained("monologg/koelectra-base-v3-discriminator", num_labels=4)
model.to(device)

#한번 실행해보기
text, attention_mask, y = train_dataset[0]
model(text.unsqueeze(0).to(device), attention_mask=attention_mask.unsqueeze(0).to(device))

config.json:   0%|          | 0.00/467 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/452M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


SequenceClassifierOutput(loss=None, logits=tensor([[-0.0474,  0.0252, -0.0032,  0.0123]], device='cuda:0',
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [None]:
model

ElectraForSequenceClassification(
  (electra): ElectraModel(
    (embeddings): ElectraEmbeddings(
      (word_embeddings): Embedding(35000, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): ElectraEncoder(
      (layer): ModuleList(
        (0-11): 12 x ElectraLayer(
          (attention): ElectraAttention(
            (self): ElectraSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): ElectraSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): L

In [None]:
model.load_state_dict(torch.load("/content/drive/MyDrive/model.pt"))

FileNotFoundError: [Errno 2] No such file or directory: 'model.pt'

In [None]:
epochs = 5
batch_size = 16

In [None]:
optimizer = AdamW(model.parameters(), lr=5e-6)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True)



In [None]:
import torch
import torch.nn.functional as F
from tqdm import tqdm

# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()

# Training loop
losses = []
accuracies = []

for epoch in range(epochs):
    total_loss = 0.0
    correct = 0
    total = 0

    model.train()

    for input_ids_batch, attention_masks_batch, y_batch in tqdm(train_loader):
        optimizer.zero_grad()
        y_batch = y_batch.to(device)

        # Forward pass
        outputs = model(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))
        logits = outputs.logits

        # Calculate loss
        loss = criterion(logits, y_batch)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(logits, 1)
        correct += (predicted == y_batch).sum().item()
        total += y_batch.size(0)

    # Average loss and accuracy for the epoch
    epoch_loss = total_loss / len(train_loader)
    epoch_accuracy = correct / total

    losses.append(epoch_loss)
    accuracies.append(epoch_accuracy)

    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}")

# Print training summary
print("Training finished.")




tensor([[-0.0391,  0.0246, -0.0546,  0.0345],
        [-0.0454,  0.0261,  0.0170,  0.0186],
        [-0.0812, -0.0301, -0.0522,  0.0016],
        [-0.0497,  0.0537,  0.0065,  0.0678],
        [-0.0821, -0.0042,  0.0161, -0.0043],
        [-0.0290,  0.0656,  0.0636,  0.0932],
        [-0.1375,  0.0182, -0.0254,  0.0279],
        [-0.0942,  0.0538,  0.0434,  0.0416],
        [-0.0699,  0.0525, -0.0039,  0.0151],
        [-0.0263, -0.0220, -0.0752,  0.0691],
        [-0.1285,  0.0443,  0.0599,  0.0178],
        [-0.0385, -0.0264,  0.0099, -0.0130],
        [-0.0432,  0.0334,  0.0499,  0.0233],
        [-0.0943,  0.0844,  0.0480, -0.0080],
        [-0.0688, -0.0182,  0.0069,  0.0654],
        [-0.0404, -0.0313, -0.0583, -0.0050]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 0, 2, 0, 0, 1, 1, 3, 2, 0, 1, 0, 1, 0, 0, 2], device='cuda:0')


  6%|▌         | 1/17 [00:01<00:17,  1.10s/it]

tensor([[-0.0718,  0.0108, -0.0030,  0.0340],
        [-0.0914,  0.0187,  0.0053,  0.0558],
        [-0.1045,  0.0137,  0.0769,  0.0239],
        [-0.1911, -0.0372, -0.0216,  0.0352],
        [-0.0862,  0.0125,  0.0799,  0.0556],
        [-0.0785,  0.0107,  0.0420, -0.0577],
        [-0.0813,  0.0572, -0.0137, -0.0126],
        [-0.0056, -0.0201,  0.0086, -0.0156],
        [-0.0335, -0.0182,  0.0118, -0.0115],
        [-0.0602,  0.0685, -0.0662,  0.0135],
        [ 0.0189,  0.0883,  0.0080, -0.0043],
        [-0.0032,  0.0096,  0.0083,  0.0526],
        [ 0.0052,  0.0652,  0.1038, -0.0155],
        [-0.1263,  0.0261,  0.0281,  0.0686],
        [-0.0731,  0.0348, -0.0048,  0.0515],
        [-0.0501, -0.0031,  0.0827,  0.0167]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 0, 1, 1, 0, 2, 0, 0, 3, 2, 3, 0, 3, 3, 3, 1], device='cuda:0')


 12%|█▏        | 2/17 [00:01<00:12,  1.24it/s]

tensor([[-0.0069,  0.0623,  0.0518,  0.0716],
        [-0.0247, -0.0786,  0.0018, -0.0208],
        [ 0.0119, -0.0022, -0.0042,  0.0820],
        [ 0.0267,  0.0193,  0.0182,  0.0222],
        [-0.0668,  0.0075,  0.0360,  0.0087],
        [-0.0902, -0.0023, -0.0269,  0.0353],
        [-0.0538, -0.0510,  0.0095,  0.0099],
        [-0.0838,  0.0227,  0.0534,  0.0498],
        [-0.0484,  0.0134, -0.0274,  0.1071],
        [-0.0278,  0.0151, -0.0530,  0.0405],
        [-0.0202,  0.0908,  0.0496, -0.0104],
        [-0.0354,  0.0478,  0.1305,  0.0685],
        [-0.0412, -0.0230,  0.0322, -0.0247],
        [-0.0333,  0.0008,  0.0341,  0.0113],
        [-0.0437,  0.0685,  0.0434,  0.0240],
        [-0.1046,  0.0582,  0.0325,  0.0552]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 0, 2, 2, 0, 2, 2, 3, 3, 0, 1, 2, 1, 3, 3, 1], device='cuda:0')


 18%|█▊        | 3/17 [00:02<00:10,  1.40it/s]

tensor([[-0.0239,  0.0261,  0.0082,  0.0166],
        [-0.0324,  0.0598,  0.0323,  0.0130],
        [-0.0350, -0.0140,  0.1578,  0.0279],
        [-0.0131, -0.0196, -0.0402,  0.0773],
        [-0.0075,  0.0955,  0.0572,  0.0226],
        [-0.0434, -0.0273, -0.0064,  0.0542],
        [ 0.0091,  0.0286, -0.0426, -0.0114],
        [-0.0304, -0.0077, -0.0383, -0.0282],
        [ 0.0403,  0.0694,  0.0433,  0.0536],
        [-0.0024, -0.0723, -0.0140,  0.0435],
        [-0.0806,  0.0410,  0.0307,  0.0745],
        [-0.0555,  0.0678,  0.0474,  0.0622],
        [ 0.0027,  0.0205,  0.0681, -0.0135],
        [-0.0121,  0.0569,  0.0243, -0.0083],
        [-0.1057, -0.0256,  0.0148,  0.0302],
        [ 0.0050, -0.0542,  0.0208,  0.0267]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 1, 3, 0, 3, 1, 2, 1, 1, 0, 2, 2, 0, 0, 2, 3], device='cuda:0')


 24%|██▎       | 4/17 [00:02<00:08,  1.49it/s]

tensor([[-0.0339, -0.0425,  0.0077, -0.0396],
        [-0.0770, -0.0713,  0.0675, -0.0288],
        [-0.0626,  0.0181,  0.0731,  0.0664],
        [-0.0700,  0.0449, -0.0188,  0.0947],
        [-0.0374,  0.0253,  0.0426,  0.0977],
        [-0.0357,  0.0633,  0.0203, -0.0123],
        [-0.0441,  0.0620, -0.0436, -0.0455],
        [-0.0509,  0.0555, -0.0191, -0.0439],
        [-0.0573,  0.0482,  0.0618, -0.0131],
        [-0.0291,  0.0814,  0.0307, -0.0178],
        [-0.0047, -0.0497, -0.1620, -0.0329],
        [-0.0343,  0.0255,  0.0775,  0.0162],
        [ 0.0099,  0.0213,  0.0584,  0.0310],
        [-0.0329,  0.1331,  0.0387, -0.0073],
        [ 0.0045,  0.0131,  0.0695, -0.0517],
        [-0.0116,  0.0475, -0.0063,  0.1194]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 0, 0, 3, 1, 0, 2, 0, 0, 0, 1, 3, 1, 0, 1, 0], device='cuda:0')


 29%|██▉       | 5/17 [00:03<00:07,  1.55it/s]

tensor([[-0.0723,  0.0148,  0.0612, -0.0062],
        [-0.0321,  0.0464,  0.0231,  0.0507],
        [ 0.0361, -0.0241, -0.0093, -0.0103],
        [-0.0185,  0.0220, -0.0294, -0.0479],
        [-0.0334, -0.0326, -0.0531, -0.0307],
        [-0.1109, -0.0005, -0.0228,  0.0072],
        [-0.0324,  0.0063,  0.0052,  0.0241],
        [-0.0141,  0.0460,  0.1263,  0.0344],
        [ 0.0296, -0.0688,  0.0270,  0.0537],
        [-0.0332, -0.0518,  0.0344, -0.0353],
        [ 0.0128,  0.0469, -0.0110,  0.1011],
        [-0.0699,  0.0470,  0.0095, -0.0481],
        [-0.0344, -0.0227,  0.0404,  0.0094],
        [-0.0404,  0.0478,  0.0759,  0.0291],
        [-0.0386,  0.0758,  0.0683,  0.0297],
        [-0.0458,  0.0174,  0.1528,  0.0234]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 1, 2, 0, 1, 2, 2, 0, 1, 1, 0, 2, 3, 0, 3, 3], device='cuda:0')


 35%|███▌      | 6/17 [00:04<00:06,  1.59it/s]

tensor([[-0.0057,  0.0277,  0.0249,  0.0765],
        [-0.0691,  0.0791,  0.0357,  0.0488],
        [-0.0876,  0.0366, -0.0150, -0.0471],
        [ 0.0114, -0.0043,  0.0677,  0.0480],
        [-0.0253,  0.0216, -0.0100,  0.0172],
        [ 0.0052, -0.0214, -0.0332,  0.0042],
        [-0.0169, -0.0660, -0.0055,  0.0744],
        [-0.0619, -0.0145, -0.0559, -0.0380],
        [-0.0417,  0.0111, -0.0720, -0.0486],
        [-0.0247, -0.0515, -0.0027,  0.1126],
        [-0.0595,  0.0729,  0.0332, -0.1062],
        [-0.0124, -0.0190, -0.0173,  0.0139],
        [-0.0646, -0.0217, -0.0340,  0.0186],
        [-0.0398, -0.0131, -0.0222,  0.0124],
        [-0.0066,  0.0364,  0.0245,  0.0396],
        [-0.0727,  0.0485, -0.0189,  0.0127]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 2, 0, 0, 2, 1, 3, 1, 2, 0, 2, 1, 0, 0, 2, 0], device='cuda:0')


 41%|████      | 7/17 [00:04<00:06,  1.61it/s]

tensor([[-0.0291,  0.0345,  0.0527, -0.0182],
        [-0.0416,  0.0211,  0.0709,  0.0375],
        [-0.0807,  0.0023,  0.0597,  0.0291],
        [ 0.0352,  0.0415,  0.0341,  0.0374],
        [-0.0246, -0.0008, -0.0450,  0.0060],
        [-0.0425,  0.0399,  0.0200,  0.0341],
        [-0.0337,  0.0801,  0.0310,  0.0704],
        [-0.0443,  0.0221, -0.0175,  0.0042],
        [-0.0614,  0.0354,  0.0166, -0.0123],
        [-0.0447, -0.0329, -0.0488, -0.0189],
        [-0.0526,  0.0108, -0.0234,  0.0536],
        [-0.0220,  0.0339,  0.0105,  0.0054],
        [ 0.0177,  0.1033,  0.0042,  0.0250],
        [ 0.0023,  0.0173,  0.0513, -0.0339],
        [-0.0230,  0.0095, -0.0475, -0.0229],
        [-0.0768,  0.0433,  0.1537,  0.0529]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 3, 0, 1, 3, 2, 1, 1, 3, 3, 1, 3, 2, 2, 2, 3], device='cuda:0')


 47%|████▋     | 8/17 [00:05<00:05,  1.63it/s]

tensor([[-0.0818,  0.0318,  0.0876,  0.0268],
        [ 0.0115,  0.0266,  0.0458, -0.0096],
        [ 0.0012,  0.0604, -0.0503, -0.0082],
        [-0.0483, -0.0205,  0.0673, -0.0821],
        [-0.0710,  0.0816, -0.0146,  0.0248],
        [-0.0542,  0.0251, -0.0118, -0.0619],
        [-0.0338,  0.0272,  0.0262,  0.0279],
        [ 0.0031,  0.1131,  0.0339,  0.0007],
        [-0.0272, -0.0799, -0.0762, -0.0492],
        [ 0.0869, -0.0404, -0.0059,  0.0305],
        [-0.0681, -0.0355,  0.0187,  0.0286],
        [-0.0428,  0.0343, -0.0593, -0.0147],
        [ 0.0376,  0.0511,  0.0136, -0.0257],
        [-0.0275,  0.0329, -0.0284, -0.0044],
        [-0.0116, -0.0749,  0.0381,  0.0163],
        [ 0.0443,  0.0124,  0.0848,  0.0811]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 0, 0, 3, 0, 2, 3, 0, 2, 1, 3, 0, 0, 0, 3, 0], device='cuda:0')


 53%|█████▎    | 9/17 [00:05<00:04,  1.64it/s]

tensor([[-5.8814e-02,  4.3870e-02,  4.6950e-02, -1.0159e-03],
        [-5.2669e-02,  2.9454e-02,  2.1719e-02,  5.3595e-02],
        [-9.7649e-02, -5.4330e-03, -6.3248e-02, -1.4058e-02],
        [-2.3368e-02, -3.0865e-02, -1.2386e-02,  7.2111e-02],
        [-1.2273e-01, -9.7905e-03, -1.9156e-03,  2.5486e-02],
        [ 1.4209e-02,  2.9185e-02,  2.0948e-02, -5.5220e-02],
        [ 4.5752e-02,  5.3943e-02,  5.1423e-02, -7.2005e-05],
        [ 4.6834e-02, -2.0928e-02, -3.6967e-02, -6.5875e-02],
        [-4.3415e-02,  1.0965e-02,  1.7998e-02,  3.3799e-02],
        [-2.3442e-02, -2.7524e-02, -4.0819e-02, -8.3176e-03],
        [-1.0774e-01,  6.9009e-02, -2.1869e-03, -1.5329e-02],
        [-9.5957e-02, -3.6210e-02, -5.4441e-02, -1.4903e-02],
        [-5.4756e-03,  5.2149e-02,  1.1349e-02,  3.3707e-02],
        [-4.5916e-02,  5.4845e-02,  1.0512e-02,  3.1490e-02],
        [-4.2407e-02,  2.8312e-02,  1.1239e-01, -4.4308e-02],
        [-4.5394e-03,  6.5682e-03,  8.1087e-02,  1.7355e-03]], device=

 59%|█████▉    | 10/17 [00:06<00:04,  1.65it/s]

tensor([[ 0.0303,  0.0389, -0.0477, -0.0184],
        [-0.0434, -0.0405, -0.0174,  0.0386],
        [ 0.0126, -0.0796,  0.0306, -0.0094],
        [-0.0411,  0.0003,  0.0196, -0.0316],
        [-0.0466, -0.0091, -0.0393, -0.0249],
        [-0.0163,  0.0067,  0.0966,  0.0658],
        [ 0.0019,  0.0238,  0.0262, -0.0155],
        [ 0.0314, -0.0228, -0.0618, -0.0237],
        [-0.0087,  0.0380,  0.0792,  0.0116],
        [-0.0207,  0.0846,  0.0423,  0.0103],
        [-0.1252, -0.0367,  0.1005,  0.0544],
        [-0.0648,  0.0151, -0.0042, -0.0205],
        [-0.0400,  0.0308,  0.0539,  0.0584],
        [ 0.0224,  0.0888,  0.0081,  0.0128],
        [-0.0897,  0.0345, -0.0168, -0.0091],
        [-0.0612, -0.0131, -0.1210,  0.0163]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 3, 1, 1, 2, 3, 0, 0, 3, 0, 0, 0, 3, 1, 0, 2], device='cuda:0')


 65%|██████▍   | 11/17 [00:07<00:03,  1.65it/s]

tensor([[-0.0261,  0.0468, -0.0158,  0.0312],
        [ 0.0155, -0.0261,  0.0220,  0.0324],
        [ 0.0200, -0.0446, -0.0548,  0.0315],
        [ 0.0002, -0.0819,  0.0073,  0.0071],
        [-0.0376, -0.0068, -0.0277, -0.0125],
        [ 0.0017,  0.0518, -0.0430,  0.0195],
        [-0.0045,  0.0167,  0.0870,  0.0180],
        [-0.0264,  0.0738,  0.0424, -0.0119],
        [-0.0272,  0.0436,  0.0396, -0.0152],
        [-0.0111,  0.0362, -0.0602,  0.0061],
        [ 0.0161,  0.0046,  0.0617, -0.0088],
        [-0.1101, -0.0015,  0.0047, -0.0352],
        [-0.0406,  0.0012, -0.0084, -0.0189],
        [-0.0387, -0.0831, -0.0267, -0.0459],
        [-0.0043, -0.0055,  0.0278,  0.0348],
        [-0.0415, -0.0014,  0.0204, -0.0734]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 0, 0, 0, 0, 0, 3, 1, 2, 1, 0, 1, 1, 0, 2, 3], device='cuda:0')


 71%|███████   | 12/17 [00:07<00:03,  1.65it/s]

tensor([[-0.0824, -0.0063, -0.0169,  0.0012],
        [-0.0292,  0.0569, -0.0136, -0.0596],
        [ 0.0232, -0.0623,  0.0185, -0.0040],
        [ 0.0067,  0.0238,  0.0665,  0.0223],
        [ 0.0552,  0.0420,  0.0428, -0.0030],
        [ 0.0108, -0.0023,  0.0163, -0.0541],
        [-0.0193,  0.0066,  0.0452,  0.0171],
        [-0.0651,  0.0863,  0.0090,  0.0116],
        [-0.0501, -0.0046, -0.0258,  0.0727],
        [-0.0497,  0.0382, -0.0683,  0.0051],
        [-0.0332,  0.0083,  0.0387,  0.0335],
        [-0.0368, -0.0811, -0.0623, -0.0007],
        [ 0.0174, -0.0049,  0.0355,  0.0051],
        [-0.0017,  0.0018,  0.0117, -0.0009],
        [-0.0124,  0.0416,  0.0776, -0.0825],
        [ 0.0051,  0.0520,  0.0088, -0.0366]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 3, 0, 3, 1, 0, 1], device='cuda:0')


 76%|███████▋  | 13/17 [00:08<00:02,  1.65it/s]

tensor([[ 0.0272,  0.0051, -0.0201, -0.0504],
        [ 0.0023, -0.0566, -0.0545,  0.0257],
        [-0.0864,  0.0411, -0.0206,  0.0180],
        [-0.0620,  0.0628,  0.0606,  0.0722],
        [-0.0205,  0.0498,  0.1419, -0.0423],
        [ 0.0262,  0.0439,  0.0634,  0.0661],
        [ 0.0109,  0.0590,  0.0368,  0.0175],
        [ 0.0321,  0.0463,  0.0364, -0.0311],
        [ 0.0481,  0.0377,  0.0039, -0.0417],
        [ 0.0034,  0.0545,  0.0568,  0.0055],
        [-0.0290,  0.0688, -0.0009, -0.0286],
        [ 0.0388,  0.0586,  0.0145,  0.0022],
        [ 0.0735,  0.0352, -0.0840, -0.0723],
        [ 0.0036, -0.0026, -0.0212, -0.0301],
        [-0.0392,  0.0253, -0.0578,  0.0208],
        [-0.0021,  0.0314,  0.0394, -0.0886]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 3, 1, 3, 0, 2, 2, 0, 1, 0, 1, 1, 1, 0, 0, 0], device='cuda:0')


 82%|████████▏ | 14/17 [00:08<00:01,  1.66it/s]

tensor([[ 0.0429,  0.0516, -0.0167, -0.0321],
        [ 0.0034,  0.0447, -0.0294,  0.0007],
        [ 0.0277,  0.0512,  0.0434,  0.0386],
        [-0.0070,  0.0529,  0.0252, -0.0071],
        [-0.0273,  0.0606, -0.0404,  0.0692],
        [-0.0065,  0.0217,  0.0242,  0.0026],
        [ 0.0081,  0.0363,  0.0159, -0.0428],
        [ 0.0474, -0.0025, -0.0215, -0.0312],
        [-0.0158, -0.0529,  0.0418,  0.0138],
        [ 0.0499,  0.0734,  0.0253, -0.0016],
        [-0.0536,  0.0399,  0.0068,  0.0238],
        [ 0.0134,  0.0321, -0.0119,  0.0398],
        [-0.0075,  0.0174,  0.0030, -0.0719],
        [ 0.0696,  0.0480,  0.0120, -0.0322],
        [ 0.0869, -0.0568,  0.0407,  0.0087],
        [-0.0472,  0.0161, -0.0285, -0.0900]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 3, 2, 0, 0, 1, 0, 3, 0, 0, 2, 3, 0, 3, 3, 0], device='cuda:0')


 88%|████████▊ | 15/17 [00:09<00:01,  1.66it/s]

tensor([[ 0.0205, -0.0196,  0.0423, -0.0116],
        [-0.0034, -0.0618,  0.0513, -0.0406],
        [ 0.0198, -0.0103, -0.0015,  0.0073],
        [ 0.0091,  0.0270,  0.0627,  0.0206],
        [ 0.0065, -0.0726,  0.0356,  0.0607],
        [ 0.1081,  0.0159,  0.0338, -0.0121],
        [ 0.0694, -0.0286, -0.0401, -0.0458],
        [-0.0848,  0.0163, -0.0552, -0.0177],
        [-0.0485,  0.0408, -0.0125, -0.0023],
        [-0.0400, -0.0419, -0.0080,  0.0246],
        [-0.0058, -0.0282, -0.0365, -0.0599],
        [ 0.0476,  0.0807, -0.0432, -0.0498],
        [ 0.0548, -0.0218,  0.0227, -0.0872],
        [ 0.0474,  0.0221,  0.0404, -0.0550],
        [-0.0100,  0.0436, -0.0021, -0.0102],
        [ 0.0186,  0.0666,  0.0002, -0.0874]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 3, 3, 3, 0, 3, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1], device='cuda:0')


 94%|█████████▍| 16/17 [00:10<00:00,  1.65it/s]

tensor([[-0.0082, -0.0602,  0.0427,  0.0500],
        [ 0.0157,  0.0049,  0.0648,  0.0292],
        [ 0.0795,  0.0145,  0.0324, -0.0120],
        [-0.0142, -0.0117, -0.0305, -0.0111],
        [ 0.0424,  0.0046, -0.0341,  0.0123],
        [ 0.0112, -0.0593, -0.0488, -0.0331],
        [ 0.0795, -0.0527,  0.0180,  0.0198],
        [ 0.1070,  0.0102, -0.0144, -0.0227],
        [-0.0432, -0.0451,  0.0729,  0.0273],
        [-0.0829,  0.0525, -0.0882,  0.0477],
        [ 0.0972,  0.0165, -0.0262,  0.0166],
        [ 0.0227, -0.0369, -0.0066,  0.0221],
        [ 0.0454,  0.0302, -0.0246,  0.0091],
        [ 0.0165, -0.0907,  0.0410, -0.0567],
        [-0.0045, -0.1021, -0.0364, -0.0058],
        [-0.0102,  0.0289, -0.0128, -0.0468]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 3, 1, 3, 2, 0, 3, 0, 3, 3, 0, 0, 0, 2, 2, 1], device='cuda:0')


100%|██████████| 17/17 [00:10<00:00,  1.58it/s]


Epoch 1/5, Loss: 1.3902, Accuracy: 0.1985


  0%|          | 0/17 [00:00<?, ?it/s]

tensor([[ 0.0448,  0.0287, -0.0687, -0.0491],
        [ 0.0140,  0.0308, -0.0199,  0.0150],
        [ 0.0247, -0.0366, -0.0663, -0.0015],
        [ 0.0830,  0.0781,  0.0791,  0.0284],
        [ 0.0763,  0.1627, -0.0813,  0.0071],
        [ 0.0175, -0.0532, -0.0261, -0.0294],
        [ 0.0133,  0.0365, -0.0328, -0.0081],
        [ 0.0314, -0.0717, -0.0184, -0.0235],
        [ 0.0712, -0.0110,  0.0280,  0.0164],
        [ 0.0159,  0.0143, -0.0896, -0.0149],
        [-0.0209,  0.0131,  0.0379,  0.0561],
        [ 0.1293, -0.0042,  0.0540, -0.0506],
        [ 0.0827, -0.0368, -0.0149,  0.0375],
        [ 0.1070, -0.0381, -0.0392,  0.0302],
        [ 0.0238, -0.0198, -0.0246, -0.0066],
        [ 0.0161, -0.0234, -0.0470, -0.0330]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 1, 2, 0, 2, 0, 0, 1, 2, 1, 3, 0, 3, 0, 1, 0], device='cuda:0')


  6%|▌         | 1/17 [00:00<00:09,  1.66it/s]

tensor([[ 0.0441, -0.0067, -0.0227, -0.0305],
        [ 0.0617,  0.0427, -0.0295, -0.1037],
        [ 0.0247, -0.0209,  0.0357,  0.1067],
        [-0.0423, -0.0193, -0.0284, -0.0233],
        [ 0.0414,  0.0271, -0.0412, -0.0366],
        [-0.0082,  0.0359, -0.0225, -0.0145],
        [ 0.0134, -0.0322,  0.0161, -0.0647],
        [ 0.0247, -0.0566, -0.0255,  0.0041],
        [ 0.0248, -0.0367, -0.0273, -0.0144],
        [ 0.0375,  0.0070, -0.0434,  0.0724],
        [ 0.0501, -0.0209, -0.0597, -0.0010],
        [ 0.1013,  0.0470,  0.0446,  0.0113],
        [ 0.1300, -0.0589,  0.0606, -0.0533],
        [ 0.0381,  0.0513,  0.0449, -0.0004],
        [-0.0007, -0.0084, -0.0179, -0.0424],
        [ 0.0040, -0.0475, -0.0489, -0.0415]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 1, 3, 3, 2, 1, 1, 0, 0, 3, 0, 0, 0, 1, 0, 0], device='cuda:0')


 12%|█▏        | 2/17 [00:01<00:09,  1.66it/s]

tensor([[-0.0111, -0.0331,  0.0263, -0.0607],
        [ 0.0476, -0.0256, -0.1018, -0.0328],
        [ 0.0214,  0.0463, -0.0290, -0.0186],
        [ 0.0380,  0.0881, -0.0073,  0.0434],
        [ 0.0286,  0.0382,  0.0484, -0.0750],
        [ 0.0204,  0.0119,  0.0049, -0.0271],
        [ 0.0294, -0.0655,  0.0401, -0.0536],
        [ 0.0191, -0.0365, -0.0433, -0.0886],
        [ 0.0049,  0.0019,  0.0730,  0.0192],
        [-0.0663,  0.0104, -0.0230,  0.0035],
        [-0.0174, -0.0365, -0.0156, -0.0251],
        [ 0.0271, -0.0104, -0.0431, -0.0390],
        [ 0.0333, -0.0438,  0.0292,  0.0155],
        [ 0.0172, -0.0428, -0.0146,  0.0352],
        [ 0.0194, -0.0150, -0.0522, -0.0540],
        [ 0.0450,  0.0298, -0.0353, -0.0462]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 0, 2, 3, 1, 0, 2, 0, 3, 3, 3, 2, 3, 3, 1, 2], device='cuda:0')


 18%|█▊        | 3/17 [00:01<00:08,  1.65it/s]

tensor([[-0.0202, -0.0073, -0.0498,  0.0934],
        [ 0.0517, -0.0498, -0.0305, -0.0603],
        [ 0.0196, -0.0062, -0.0233, -0.0136],
        [ 0.0215,  0.0570, -0.0394,  0.0240],
        [ 0.0471,  0.0959, -0.1116, -0.0282],
        [-0.0162, -0.0091, -0.0683, -0.0251],
        [ 0.0501,  0.0166, -0.0099,  0.0279],
        [ 0.0410,  0.0096,  0.0150,  0.0354],
        [ 0.0591, -0.0226, -0.0485, -0.0039],
        [ 0.0235, -0.0122, -0.0514,  0.0135],
        [ 0.0642,  0.0353,  0.0006, -0.0389],
        [ 0.0631, -0.0635, -0.0138,  0.0362],
        [ 0.0294,  0.0487,  0.0556,  0.0495],
        [ 0.0546, -0.0660, -0.0397, -0.0269],
        [ 0.0432,  0.0957,  0.0435,  0.0015],
        [ 0.1129, -0.0569,  0.0101, -0.0254]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 0, 1, 1, 1, 2, 0, 1, 0, 0, 2, 0, 3, 1, 1, 0], device='cuda:0')


 24%|██▎       | 4/17 [00:02<00:07,  1.64it/s]

tensor([[ 0.0948, -0.0446,  0.0105, -0.0699],
        [ 0.1541,  0.0206, -0.0444, -0.0715],
        [ 0.0502, -0.0372, -0.0175, -0.0181],
        [ 0.0573,  0.0309, -0.0750, -0.0181],
        [ 0.0786, -0.0244, -0.0193, -0.0105],
        [ 0.0386, -0.0109, -0.0471,  0.0388],
        [ 0.1175,  0.0029, -0.0760, -0.0459],
        [ 0.0380,  0.0698, -0.0462,  0.0206],
        [ 0.0552,  0.0716,  0.0226,  0.0092],
        [ 0.0379, -0.0013,  0.0103, -0.0011],
        [ 0.0394, -0.0311, -0.0122, -0.0023],
        [ 0.0121,  0.0619, -0.0459, -0.0113],
        [ 0.0416,  0.0057, -0.0865, -0.0952],
        [ 0.0388,  0.0260, -0.0031,  0.0368],
        [ 0.0299,  0.0753,  0.0158,  0.0209],
        [ 0.1027,  0.0303, -0.0511, -0.0354]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 1, 1, 2, 3, 0, 1, 1, 2, 0, 3, 0, 1, 1, 3, 0], device='cuda:0')


 29%|██▉       | 5/17 [00:03<00:07,  1.64it/s]

tensor([[ 0.0394, -0.0028, -0.0445, -0.0176],
        [ 0.0028,  0.0486, -0.0519, -0.0259],
        [-0.0081, -0.0030, -0.0303, -0.0280],
        [ 0.0437,  0.0175, -0.0258,  0.0513],
        [ 0.1302, -0.0381, -0.0504, -0.0659],
        [ 0.1159,  0.0146, -0.0218, -0.1183],
        [ 0.0474,  0.0522, -0.0463, -0.0555],
        [ 0.0273,  0.0290, -0.0251, -0.0149],
        [ 0.0466,  0.0878,  0.0270, -0.0008],
        [ 0.0374,  0.1087, -0.0583, -0.0434],
        [ 0.0274,  0.0323, -0.0429,  0.0099],
        [ 0.0353, -0.0806, -0.1119,  0.0021],
        [ 0.0874,  0.0355, -0.1227,  0.0018],
        [ 0.0014, -0.0262, -0.0283, -0.0355],
        [ 0.0674,  0.0008,  0.0230, -0.0062],
        [ 0.0677, -0.0423, -0.0547,  0.0393]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 3, 1, 3, 0, 0, 3, 1, 2, 1, 0, 3, 0, 2, 2, 3], device='cuda:0')


 35%|███▌      | 6/17 [00:03<00:06,  1.64it/s]

tensor([[ 4.2658e-02, -5.1471e-02,  4.1599e-03, -1.0906e-02],
        [ 5.0667e-02, -1.5031e-05,  1.6760e-02,  4.8357e-02],
        [ 9.7013e-02,  1.0989e-02, -1.2481e-02, -7.4245e-02],
        [ 1.4007e-01,  9.7098e-02, -1.9962e-02, -8.1594e-02],
        [ 3.6052e-02,  3.3286e-02, -8.7163e-02, -5.1958e-02],
        [ 1.1967e-01,  3.4834e-02, -4.6795e-02, -3.8583e-02],
        [ 7.4009e-02,  1.2376e-02, -4.3845e-02, -2.6684e-02],
        [ 7.7535e-02,  3.2314e-02, -1.2415e-02, -6.0025e-02],
        [ 4.7840e-02, -4.8460e-03, -8.7109e-02, -2.4793e-02],
        [ 8.7673e-03, -2.6450e-02, -5.8235e-02, -1.8820e-02],
        [ 2.0336e-02,  5.9699e-02, -1.6111e-02, -3.7392e-02],
        [ 1.0281e-01,  7.5327e-02, -2.7034e-02, -3.9609e-02],
        [ 2.4519e-02,  3.4213e-02, -4.5051e-02, -4.6479e-02],
        [ 3.4979e-02,  9.6574e-02, -8.0954e-02, -2.9485e-02],
        [ 8.7447e-03,  4.3746e-02, -2.6248e-02, -6.1237e-02],
        [ 8.9947e-02, -2.8023e-03,  2.6381e-03, -7.5228e-04]], device=

 41%|████      | 7/17 [00:04<00:06,  1.63it/s]

tensor([[ 0.0699,  0.0715, -0.0506, -0.0423],
        [ 0.0519,  0.0170, -0.0159,  0.0394],
        [ 0.0563,  0.0259, -0.0121, -0.0319],
        [ 0.0909, -0.0145, -0.1271, -0.0459],
        [ 0.0359, -0.0272,  0.0275, -0.0358],
        [ 0.0475, -0.0075,  0.0083,  0.0434],
        [ 0.0839,  0.0217, -0.0957, -0.0523],
        [ 0.0111, -0.0021, -0.0390,  0.0055],
        [ 0.1226,  0.0481, -0.0354,  0.0283],
        [ 0.0907,  0.0030, -0.0262, -0.0304],
        [-0.0091,  0.0128, -0.0123, -0.0962],
        [ 0.1061, -0.0514, -0.0507, -0.0931],
        [ 0.1435,  0.0169,  0.0228, -0.0909],
        [ 0.0977,  0.0567, -0.0493, -0.0352],
        [ 0.0597,  0.0277, -0.0710, -0.0364],
        [ 0.0502, -0.0299,  0.0342, -0.0345]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 3, 2, 0, 0, 3, 0, 0, 2, 0, 1, 0, 0, 2, 0, 3], device='cuda:0')


 47%|████▋     | 8/17 [00:04<00:05,  1.64it/s]

tensor([[-0.0166,  0.0690,  0.1106, -0.0619],
        [ 0.0507,  0.0309,  0.0075, -0.1085],
        [ 0.0352,  0.0119, -0.0858, -0.0206],
        [ 0.0206, -0.0051, -0.0351, -0.0008],
        [ 0.0311, -0.0269, -0.0489,  0.0421],
        [ 0.0818,  0.0107, -0.0311, -0.0705],
        [ 0.0620, -0.0120, -0.0252, -0.0295],
        [ 0.1141, -0.0075,  0.0079, -0.0409],
        [ 0.1321,  0.0700,  0.0008, -0.0354],
        [-0.0095, -0.0258, -0.0508,  0.0281],
        [ 0.0976, -0.0036, -0.0126, -0.0331],
        [ 0.0416,  0.0177, -0.0064, -0.0916],
        [ 0.1570, -0.0573, -0.0713, -0.0152],
        [ 0.0654,  0.0731, -0.0111, -0.0568],
        [ 0.0031,  0.0260, -0.0153, -0.0621],
        [ 0.1292, -0.0107, -0.0035,  0.0507]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 1, 1, 3, 3, 0, 0, 0, 0, 3, 1, 1, 3, 2, 1, 3], device='cuda:0')


 53%|█████▎    | 9/17 [00:05<00:04,  1.64it/s]

tensor([[ 0.1042, -0.0090, -0.0231, -0.0197],
        [ 0.0648, -0.0245, -0.0744,  0.0290],
        [ 0.0039,  0.0023, -0.1063, -0.0583],
        [-0.0240,  0.0024, -0.0787, -0.0755],
        [ 0.1476,  0.0224, -0.0583,  0.0408],
        [ 0.0243, -0.0181, -0.0290,  0.0598],
        [ 0.1289,  0.0389, -0.0257, -0.0768],
        [ 0.1314,  0.0068, -0.0179,  0.0085],
        [ 0.0244, -0.0869, -0.0810,  0.0183],
        [ 0.0329, -0.0132,  0.0169, -0.0104],
        [ 0.1147, -0.0711, -0.1233, -0.0331],
        [ 0.0463, -0.0298, -0.0134, -0.0592],
        [ 0.1570,  0.0178, -0.0732, -0.0701],
        [ 0.0747,  0.0529,  0.0163,  0.0806],
        [ 0.0681,  0.0256, -0.0357, -0.0256],
        [ 0.1155,  0.0490, -0.0110, -0.0238]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 3, 2, 2, 2, 3, 0, 0, 2, 1, 0, 0, 0, 3, 0, 1], device='cuda:0')


 59%|█████▉    | 10/17 [00:06<00:04,  1.63it/s]

tensor([[ 0.1641, -0.0156, -0.0138,  0.0050],
        [ 0.1438, -0.0409, -0.0008, -0.0654],
        [ 0.1763, -0.0624, -0.0101, -0.0719],
        [ 0.1174, -0.0078, -0.0835, -0.0837],
        [ 0.0417, -0.0362, -0.0831,  0.0032],
        [ 0.0976, -0.0397, -0.0349, -0.0389],
        [ 0.1288,  0.0442, -0.0537,  0.0416],
        [ 0.0492,  0.0218,  0.0312, -0.0653],
        [ 0.0205, -0.0491, -0.0563, -0.0290],
        [ 0.1247,  0.0075, -0.0807,  0.0036],
        [ 0.1145, -0.0753,  0.0160,  0.0417],
        [ 0.0602, -0.0047, -0.0402, -0.0403],
        [-0.0144,  0.0065,  0.0282,  0.0003],
        [ 0.0273, -0.0239, -0.0769,  0.0461],
        [ 0.0232,  0.0267, -0.0613, -0.0824],
        [ 0.0980,  0.0055, -0.0394, -0.0838]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 0, 0, 0, 0, 0, 3, 2, 3, 3, 3, 2, 3, 3, 2, 1], device='cuda:0')


 65%|██████▍   | 11/17 [00:06<00:03,  1.64it/s]

tensor([[ 4.4644e-02,  6.5807e-02, -1.1058e-01,  1.7126e-02],
        [ 1.0851e-01,  3.1087e-02, -4.2019e-02, -5.9279e-02],
        [ 1.0667e-01,  5.9435e-03, -5.0027e-02, -5.3250e-02],
        [ 5.9845e-02,  3.1582e-02, -3.3949e-02, -2.5975e-02],
        [ 1.5795e-01,  2.9359e-02, -4.5475e-02,  5.2166e-02],
        [ 2.1555e-01, -4.6777e-02,  4.2644e-02, -5.4720e-02],
        [ 1.2113e-01,  3.4746e-02,  4.2828e-03, -1.5010e-02],
        [ 6.4471e-02,  5.3649e-02, -1.2360e-02, -1.1249e-01],
        [ 1.0796e-01, -4.5551e-02, -6.0410e-02, -1.1676e-01],
        [ 6.9867e-02,  9.3701e-02,  2.1319e-04, -6.6606e-03],
        [ 8.6303e-02,  1.8393e-02, -9.1531e-02, -5.1986e-03],
        [ 1.4106e-02, -2.9887e-02, -5.1353e-02, -4.7709e-02],
        [ 9.3276e-02, -1.0882e-01, -5.9051e-02,  3.7284e-02],
        [ 3.9228e-02, -1.7085e-02, -3.5712e-02, -5.1704e-02],
        [ 4.6980e-02,  4.0722e-03, -1.8621e-02, -3.8006e-02],
        [ 8.2457e-02, -4.9792e-02, -9.3762e-02, -1.7976e-02]], device=

 71%|███████   | 12/17 [00:07<00:03,  1.64it/s]

tensor([[ 0.0560, -0.0378, -0.1150, -0.0876],
        [ 0.0302, -0.0878, -0.1021,  0.0418],
        [ 0.0546,  0.0730,  0.0011,  0.0376],
        [ 0.1031, -0.0336, -0.0917, -0.1270],
        [ 0.1289,  0.0513, -0.0573, -0.0180],
        [ 0.0711, -0.0440, -0.0444, -0.0355],
        [ 0.1003,  0.0361, -0.0150, -0.0481],
        [ 0.1854,  0.0362, -0.1231, -0.0955],
        [ 0.1480,  0.0023, -0.1656, -0.0319],
        [ 0.0625,  0.0129, -0.0207, -0.1217],
        [ 0.1505, -0.0165, -0.0181, -0.0850],
        [ 0.1290, -0.0070, -0.0273, -0.0163],
        [ 0.1244, -0.0017, -0.0742, -0.0873],
        [ 0.0530,  0.1008, -0.0246, -0.0423],
        [ 0.0423,  0.1011, -0.0289, -0.0791],
        [ 0.1187,  0.0191, -0.0880, -0.0842]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 3, 2, 0, 2, 0, 2, 0, 0, 1, 0, 0, 0, 0, 1, 0], device='cuda:0')


 76%|███████▋  | 13/17 [00:07<00:02,  1.63it/s]

tensor([[ 0.0755,  0.0043, -0.0528, -0.0904],
        [ 0.1315,  0.0046,  0.0064, -0.0841],
        [ 0.1369, -0.0324,  0.0097,  0.0176],
        [ 0.0777,  0.0566, -0.0433, -0.0951],
        [ 0.0804,  0.0912, -0.1161, -0.0778],
        [ 0.1201, -0.0258, -0.0856, -0.0382],
        [ 0.1543, -0.0458, -0.0086, -0.0320],
        [ 0.0705,  0.0469, -0.0171, -0.0280],
        [ 0.1045,  0.0100, -0.0906, -0.0107],
        [ 0.0218, -0.0614, -0.0989,  0.0992],
        [ 0.1285, -0.0657, -0.0720, -0.0623],
        [-0.0093, -0.0297, -0.0765, -0.0793],
        [ 0.0636,  0.0271, -0.0278,  0.0331],
        [ 0.1494,  0.0407, -0.0132,  0.0110],
        [ 0.0880,  0.0765, -0.0424, -0.0995],
        [ 0.1300, -0.0598, -0.0653, -0.0317]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 0, 3, 2, 1, 0, 0, 3, 0, 3, 0, 1, 3, 1, 2, 0], device='cuda:0')


 82%|████████▏ | 14/17 [00:08<00:01,  1.62it/s]

tensor([[ 0.0570, -0.0184, -0.1052, -0.1165],
        [-0.0303, -0.0308, -0.1391,  0.0378],
        [ 0.1379, -0.0423, -0.1422, -0.0456],
        [ 0.0829, -0.0657, -0.0098, -0.0297],
        [ 0.0973,  0.0593,  0.0277, -0.0726],
        [ 0.1884, -0.0999, -0.0627, -0.0019],
        [-0.0144,  0.0324, -0.0446, -0.0645],
        [-0.0037, -0.0353, -0.0322,  0.0922],
        [ 0.1040,  0.0471, -0.0758, -0.0333],
        [ 0.1056, -0.0221, -0.1053, -0.0260],
        [ 0.0213,  0.0087, -0.0481,  0.0121],
        [ 0.1206,  0.0168, -0.0622, -0.0722],
        [ 0.0733, -0.0141,  0.0245, -0.0172],
        [ 0.0537, -0.0519, -0.0503,  0.0313],
        [ 0.1426, -0.0687, -0.0525, -0.0444],
        [ 0.0520,  0.0749,  0.0166, -0.0925]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 3, 0, 3, 2, 0, 0, 3, 2, 1, 2, 1, 2, 3, 0, 1], device='cuda:0')


 88%|████████▊ | 15/17 [00:09<00:01,  1.62it/s]

tensor([[ 0.1013,  0.0594, -0.0307, -0.0791],
        [ 0.1060,  0.0185, -0.0681, -0.0276],
        [ 0.0160,  0.0267, -0.0677, -0.0623],
        [ 0.0193,  0.0633,  0.0583,  0.0313],
        [ 0.0327, -0.0609, -0.0543,  0.0527],
        [ 0.1523, -0.0434, -0.0122,  0.0088],
        [ 0.1144, -0.0592, -0.0749, -0.0488],
        [ 0.1118, -0.0666, -0.0811, -0.0209],
        [ 0.0989,  0.0369, -0.0856, -0.1307],
        [ 0.1929,  0.0080, -0.1252, -0.0761],
        [ 0.1533, -0.0667, -0.0003,  0.0098],
        [ 0.1579,  0.0112,  0.0103, -0.1753],
        [ 0.0897, -0.0253, -0.0556, -0.0072],
        [ 0.1424, -0.0579, -0.1145, -0.0858],
        [ 0.1304, -0.0234, -0.0362, -0.0205],
        [ 0.1350, -0.0088, -0.0381,  0.0282]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 3, 1, 3, 3, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 3], device='cuda:0')


 94%|█████████▍| 16/17 [00:09<00:00,  1.59it/s]

tensor([[ 0.1870, -0.0331, -0.1013, -0.0763],
        [ 0.0840,  0.0538,  0.0257, -0.0598],
        [ 0.1471, -0.0247, -0.0337, -0.0468],
        [ 0.2131,  0.0009, -0.0327, -0.0722],
        [ 0.1546, -0.0023,  0.0078,  0.0109],
        [ 0.0521,  0.0886, -0.0390, -0.0521],
        [ 0.0686, -0.0136, -0.0728,  0.0103],
        [ 0.1144,  0.0357, -0.1101, -0.0274],
        [ 0.1063,  0.0477, -0.0327, -0.0682],
        [ 0.0823, -0.0041, -0.0277, -0.0015],
        [ 0.0951,  0.0645,  0.0114, -0.0601],
        [ 0.1742,  0.0587, -0.0598, -0.0552],
        [ 0.1329, -0.0691,  0.0048, -0.0429],
        [ 0.0099, -0.0331, -0.0984,  0.0533],
        [ 0.1842, -0.0474, -0.0197, -0.0153],
        [ 0.0600,  0.0016, -0.1051, -0.0153]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 1, 0, 2, 1, 1, 3, 1, 1, 2, 0, 0, 2, 3, 0, 3], device='cuda:0')


100%|██████████| 17/17 [00:10<00:00,  1.63it/s]


Epoch 2/5, Loss: 1.3510, Accuracy: 0.4890


  0%|          | 0/17 [00:00<?, ?it/s]

tensor([[ 0.1260, -0.0894, -0.0989, -0.1411],
        [ 0.0995, -0.0021, -0.0428, -0.0790],
        [ 0.1409,  0.0414, -0.0779, -0.1003],
        [ 0.1731,  0.0129, -0.0897, -0.0247],
        [ 0.1230, -0.0279, -0.0505, -0.0850],
        [ 0.1241,  0.0485,  0.0037, -0.0384],
        [ 0.1614,  0.0252, -0.0807, -0.1119],
        [ 0.2332, -0.0272, -0.0999, -0.0200],
        [ 0.1129,  0.0144, -0.0569,  0.0482],
        [ 0.1797, -0.0019, -0.0672, -0.0399],
        [ 0.0896, -0.0751, -0.1220, -0.0387],
        [ 0.0404,  0.0211, -0.0724, -0.0847],
        [ 0.0997,  0.0613, -0.0387, -0.0422],
        [ 0.1989, -0.0003, -0.0451, -0.0502],
        [ 0.1405, -0.0204, -0.0210, -0.0865],
        [ 0.2490, -0.0264, -0.0994, -0.0829]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 1, 0, 0, 2, 0, 1, 0, 3, 0, 2, 1, 2, 2, 0, 0], device='cuda:0')


  6%|▌         | 1/17 [00:00<00:09,  1.65it/s]

tensor([[ 0.2140, -0.0329, -0.0697, -0.0436],
        [ 0.0927, -0.0343,  0.0035,  0.0336],
        [ 0.1505, -0.0086, -0.0879, -0.0246],
        [ 0.1784,  0.0043, -0.0898,  0.0425],
        [ 0.0407,  0.0096, -0.0269, -0.0420],
        [ 0.1223,  0.0252, -0.0282, -0.0579],
        [ 0.1379, -0.0112, -0.1356,  0.0254],
        [ 0.1815, -0.0517, -0.0610, -0.0148],
        [ 0.1264,  0.0455, -0.0419,  0.0806],
        [ 0.0370, -0.0794, -0.0278, -0.0312],
        [ 0.0599, -0.0110, -0.1047, -0.0549],
        [ 0.0729,  0.0662, -0.0197, -0.0766],
        [ 0.1290,  0.0741, -0.0742,  0.0272],
        [ 0.1521, -0.0328, -0.1029, -0.1716],
        [-0.0052,  0.0134, -0.0454,  0.0474],
        [ 0.1578, -0.0235, -0.0725, -0.0611]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 3, 0, 2, 1, 2, 3, 0, 0, 3, 3, 1, 2, 0, 3, 0], device='cuda:0')


 12%|█▏        | 2/17 [00:01<00:09,  1.63it/s]

tensor([[ 0.1186,  0.0397, -0.0857, -0.0474],
        [ 0.0289,  0.0699, -0.0795,  0.0158],
        [ 0.1400, -0.0314, -0.1160, -0.0171],
        [ 0.0615,  0.1261, -0.0274, -0.0168],
        [ 0.2275, -0.1284, -0.0252, -0.0275],
        [ 0.0169,  0.0670, -0.0729, -0.1429],
        [ 0.0505,  0.0293, -0.0835,  0.0050],
        [ 0.0655, -0.0018, -0.0220, -0.0207],
        [ 0.0277, -0.0633, -0.0600,  0.0172],
        [ 0.0598,  0.0022, -0.1291, -0.0767],
        [ 0.1154, -0.0317, -0.0499, -0.0833],
        [ 0.1155,  0.0664, -0.0827, -0.1066],
        [ 0.0283, -0.0546, -0.0878,  0.0285],
        [ 0.1288, -0.0937, -0.0700, -0.0688],
        [ 0.1429, -0.0643, -0.0891, -0.0438],
        [ 0.1199, -0.0738,  0.0181, -0.0076]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 1, 0, 1, 0, 1, 1, 1, 3, 1, 2, 1, 3, 3, 0, 2], device='cuda:0')


 18%|█▊        | 3/17 [00:01<00:08,  1.64it/s]

tensor([[ 0.0860, -0.0127, -0.0599, -0.0989],
        [ 0.2228,  0.0025, -0.0582, -0.0377],
        [ 0.0581,  0.0448, -0.0489, -0.1069],
        [ 0.0303,  0.0506, -0.1486, -0.0610],
        [ 0.2094, -0.0492,  0.0375, -0.0372],
        [ 0.0945, -0.0012, -0.1226, -0.0803],
        [ 0.0952,  0.0658, -0.0927, -0.1203],
        [ 0.2935, -0.0978, -0.0918, -0.0339],
        [ 0.0986,  0.0267, -0.0272, -0.1207],
        [ 0.1177, -0.0784, -0.0864, -0.0340],
        [ 0.1897, -0.0708, -0.0778, -0.0373],
        [ 0.1271, -0.0061, -0.0379, -0.0929],
        [ 0.0623,  0.0393, -0.0380, -0.0780],
        [ 0.1942, -0.0563, -0.1343, -0.1458],
        [ 0.1205, -0.0537, -0.0612, -0.1233],
        [ 0.0718,  0.0414, -0.0605,  0.0142]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 0, 1, 1, 2, 2, 1, 0, 1, 2, 0, 0, 1, 0, 2, 1], device='cuda:0')


 24%|██▎       | 4/17 [00:02<00:07,  1.63it/s]

tensor([[ 0.1482, -0.0582, -0.0589, -0.0661],
        [ 0.1389,  0.0961, -0.0989, -0.0793],
        [ 0.0860,  0.0484, -0.0559, -0.0681],
        [ 0.2250,  0.0329, -0.1162, -0.1201],
        [ 0.1241,  0.1329, -0.1902, -0.0857],
        [ 0.2571, -0.0183, -0.1475, -0.0628],
        [ 0.1043,  0.0571, -0.0604, -0.1472],
        [ 0.2146,  0.0274, -0.0532, -0.1151],
        [ 0.1775, -0.0893, -0.1184,  0.0052],
        [ 0.1151, -0.0371, -0.0908, -0.0267],
        [ 0.1100,  0.0277, -0.0437, -0.0305],
        [ 0.1680, -0.0913, -0.0870, -0.0985],
        [ 0.1485,  0.0460,  0.0151, -0.0828],
        [ 0.1680, -0.0948, -0.1059, -0.0341],
        [ 0.2780, -0.0106, -0.0356, -0.1012],
        [ 0.0734,  0.0101, -0.0575, -0.0465]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 2, 1, 0, 1, 0, 1, 2, 3, 2, 2, 0, 0, 0, 0, 2], device='cuda:0')


 29%|██▉       | 5/17 [00:03<00:07,  1.63it/s]

tensor([[ 0.3270, -0.0568, -0.1508, -0.0769],
        [ 0.1428, -0.0584, -0.0101, -0.0131],
        [ 0.1857,  0.0208, -0.0971, -0.0826],
        [ 0.1156, -0.0837,  0.0076,  0.0035],
        [ 0.0683, -0.0060, -0.0876,  0.0871],
        [ 0.2848, -0.0130, -0.0648, -0.0060],
        [ 0.2544, -0.0455, -0.0384, -0.0575],
        [ 0.0824, -0.0568, -0.0893,  0.0111],
        [ 0.2731, -0.1192, -0.0878, -0.0959],
        [ 0.0874,  0.0084, -0.0512, -0.0034],
        [ 0.3268, -0.0667, -0.1057, -0.0743],
        [ 0.1329, -0.0316, -0.0823, -0.0638],
        [ 0.0533, -0.0815, -0.0778,  0.0505],
        [ 0.1976, -0.0473, -0.0272, -0.0544],
        [ 0.1038,  0.0657, -0.0534, -0.0597],
        [ 0.1168,  0.0742, -0.0746, -0.0294]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 3, 2, 3, 3, 0, 0, 3, 0, 3, 0, 2, 3, 2, 1, 1], device='cuda:0')


 35%|███▌      | 6/17 [00:03<00:06,  1.63it/s]

tensor([[ 0.2574, -0.0496,  0.0084, -0.0604],
        [ 0.2550, -0.0396, -0.1604,  0.0087],
        [ 0.0467, -0.0783, -0.1102,  0.0481],
        [ 0.2335, -0.0519, -0.2128, -0.0702],
        [ 0.1551,  0.0348, -0.0562, -0.1581],
        [ 0.1602,  0.0364, -0.0416, -0.0497],
        [ 0.2577, -0.0064, -0.1335, -0.1543],
        [ 0.2306, -0.0271, -0.1274, -0.0970],
        [ 0.2388, -0.0343, -0.0469, -0.0964],
        [ 0.2392, -0.0165, -0.0949, -0.1484],
        [ 0.1436,  0.0613, -0.0826, -0.0537],
        [ 0.2059, -0.0512, -0.0412, -0.0430],
        [ 0.0733,  0.0293, -0.0636, -0.0729],
        [ 0.1904, -0.0253, -0.0450, -0.0463],
        [ 0.1446,  0.0597, -0.0991, -0.0756],
        [ 0.0922, -0.0007, -0.0382, -0.0840]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 0, 3, 0, 2, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 2], device='cuda:0')


 41%|████      | 7/17 [00:04<00:06,  1.62it/s]

tensor([[ 0.2222, -0.0527, -0.0202, -0.0593],
        [ 0.1262, -0.1152, -0.0305,  0.0509],
        [ 0.1731,  0.0625, -0.0618, -0.0592],
        [ 0.1585,  0.0380, -0.1430, -0.1470],
        [ 0.2585, -0.0856, -0.0993, -0.1557],
        [ 0.0612,  0.0427, -0.0968, -0.0340],
        [ 0.1853,  0.0278, -0.1331, -0.1295],
        [ 0.1262,  0.1020, -0.1184, -0.0588],
        [ 0.0687,  0.0531, -0.0637, -0.0867],
        [ 0.0821,  0.0857,  0.0869, -0.0514],
        [ 0.0536,  0.0025, -0.0342, -0.0916],
        [ 0.1922,  0.0028, -0.0458, -0.1363],
        [ 0.1898, -0.0201,  0.0055, -0.1289],
        [ 0.2299, -0.1042, -0.1040, -0.1061],
        [ 0.2429, -0.0793, -0.0486, -0.1310],
        [ 0.2515, -0.0720, -0.1072, -0.1089]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 3, 1, 1, 0, 2, 0, 1, 1, 2, 2, 0, 2, 0, 0, 0], device='cuda:0')


 47%|████▋     | 8/17 [00:04<00:05,  1.62it/s]

tensor([[ 0.2407, -0.0358, -0.1721, -0.0217],
        [ 0.2337, -0.0814, -0.0388, -0.0359],
        [ 0.2402, -0.1285, -0.0610, -0.1577],
        [ 0.1616,  0.0711, -0.1209, -0.0402],
        [ 0.2459, -0.0180, -0.1202, -0.0542],
        [ 0.1332, -0.0591, -0.1574, -0.0385],
        [ 0.0871, -0.0062, -0.0502, -0.0694],
        [ 0.1007, -0.0532, -0.0090, -0.0800],
        [ 0.0928,  0.0220, -0.0737, -0.0819],
        [ 0.1268,  0.0025, -0.0360, -0.0732],
        [ 0.1338,  0.1103, -0.0597, -0.1866],
        [ 0.1407, -0.0310, -0.1631,  0.0052],
        [ 0.0449,  0.0506, -0.0938, -0.0942],
        [ 0.2514, -0.0508, -0.0151, -0.0305],
        [ 0.1518,  0.0720,  0.0119, -0.1104],
        [ 0.2340, -0.0491, -0.1267, -0.1366]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 3, 0, 1, 0, 3, 2, 1, 2, 2, 2, 3, 1, 0, 1, 0], device='cuda:0')


 53%|█████▎    | 9/17 [00:05<00:04,  1.61it/s]

tensor([[ 0.3485, -0.0476, -0.0612, -0.0839],
        [ 0.2483, -0.0415, -0.0282, -0.0377],
        [ 0.2852, -0.1080, -0.1311, -0.0630],
        [ 0.2734, -0.0864, -0.0708, -0.0597],
        [ 0.3000, -0.0977, -0.0792, -0.0865],
        [ 0.1610, -0.0365, -0.0521,  0.0711],
        [ 0.0629,  0.0751, -0.0589, -0.0919],
        [ 0.0860, -0.0137, -0.1653, -0.0364],
        [ 0.1057,  0.0205, -0.1465, -0.1019],
        [ 0.2164, -0.0234, -0.0763,  0.0577],
        [ 0.1609, -0.0178, -0.0728, -0.0584],
        [ 0.1681,  0.0069, -0.0410, -0.0394],
        [ 0.2500, -0.0438, -0.1781, -0.0605],
        [ 0.2782, -0.0828, -0.0346, -0.0701],
        [ 0.0311,  0.0506, -0.1032, -0.0957],
        [ 0.2441, -0.0393, -0.1015, -0.0883]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 2, 0, 0, 0, 3, 2, 3, 1, 0, 2, 3, 0, 0, 1, 0], device='cuda:0')


 59%|█████▉    | 10/17 [00:06<00:04,  1.62it/s]

tensor([[ 0.2016, -0.0387, -0.1425, -0.1917],
        [ 0.3136,  0.0517, -0.0981, -0.1752],
        [ 0.2153, -0.0933, -0.0964, -0.1148],
        [ 0.2469,  0.0113, -0.0518, -0.1577],
        [ 0.1205,  0.0245, -0.0818, -0.0972],
        [ 0.1472, -0.0126, -0.0736,  0.0334],
        [ 0.1518,  0.0512, -0.0349, -0.0651],
        [ 0.0842,  0.0016, -0.0863, -0.1389],
        [ 0.1658, -0.0197, -0.1581, -0.0537],
        [ 0.2618, -0.0589, -0.0858, -0.0468],
        [ 0.0756, -0.0614, -0.0412,  0.1309],
        [ 0.2130, -0.1242, -0.0815, -0.0342],
        [ 0.1654,  0.1572, -0.1021, -0.1169],
        [ 0.1330,  0.0348, -0.1269, -0.1875],
        [ 0.1522,  0.1192, -0.0602, -0.0821],
        [ 0.2692, -0.0376, -0.0691, -0.0239]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 0, 0, 2, 2, 3, 2, 1, 0, 3, 3, 3, 1, 1, 1, 0], device='cuda:0')


 65%|██████▍   | 11/17 [00:06<00:03,  1.62it/s]

tensor([[ 0.1422,  0.0940, -0.0512, -0.1128],
        [ 0.0946, -0.0500, -0.1880,  0.0448],
        [ 0.2347, -0.1446, -0.1100, -0.0562],
        [ 0.0641,  0.1877, -0.1089, -0.0444],
        [ 0.3443, -0.0272, -0.1190, -0.0462],
        [ 0.2445, -0.0817, -0.0735, -0.0491],
        [ 0.1739, -0.0313, -0.0349, -0.0776],
        [ 0.1426, -0.0506, -0.0806,  0.0697],
        [ 0.3010, -0.0239, -0.1666, -0.0939],
        [ 0.1370,  0.0690, -0.0473, -0.0924],
        [ 0.1366, -0.0408, -0.0410,  0.0497],
        [ 0.1425,  0.0461, -0.0064,  0.0121],
        [ 0.1458, -0.1488, -0.1310,  0.0244],
        [ 0.2823, -0.0826, -0.1159, -0.0835],
        [ 0.1927, -0.0178, -0.0482,  0.0095],
        [ 0.1192, -0.1732, -0.0488,  0.0430]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 3, 0, 1, 0, 0, 2, 3, 0, 1, 3, 2, 3, 0, 2, 3], device='cuda:0')


 71%|███████   | 12/17 [00:07<00:03,  1.61it/s]

tensor([[ 0.0970,  0.0781, -0.0158, -0.0807],
        [ 0.0995,  0.0353, -0.0953, -0.0967],
        [ 0.0695,  0.0378, -0.1236, -0.0679],
        [ 0.3075, -0.0718, -0.0937, -0.0930],
        [ 0.3178, -0.0920, -0.1271, -0.0930],
        [ 0.3642, -0.1224, -0.1155, -0.0358],
        [ 0.0726,  0.0804, -0.0274, -0.1288],
        [ 0.1342, -0.0116, -0.0172,  0.0227],
        [ 0.2078, -0.1406, -0.1012, -0.0870],
        [ 0.1205,  0.0450, -0.0846, -0.1453],
        [ 0.0828,  0.0696, -0.0651, -0.1296],
        [ 0.2066, -0.1412, -0.0608, -0.0519],
        [ 0.1743, -0.0144, -0.1117, -0.0016],
        [ 0.2823, -0.0716, -0.1582, -0.0948],
        [ 0.1220, -0.0997, -0.1019,  0.0930],
        [ 0.1676, -0.1194, -0.0755,  0.0829]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 1, 0, 0, 0, 0, 1, 3, 0, 2, 2, 3, 3, 0, 3, 3], device='cuda:0')


 76%|███████▋  | 13/17 [00:08<00:02,  1.62it/s]

tensor([[ 0.2127,  0.0311, -0.0507, -0.1196],
        [ 0.1129, -0.0685,  0.0085,  0.0989],
        [ 0.1395, -0.1203, -0.0875,  0.1100],
        [ 0.1670,  0.0288, -0.1074, -0.0702],
        [ 0.1645,  0.0394, -0.0707, -0.0898],
        [ 0.2157, -0.0949, -0.0685, -0.1050],
        [ 0.3460, -0.0812, -0.0733, -0.0948],
        [ 0.2214, -0.1251, -0.1738, -0.0513],
        [ 0.3393, -0.1122, -0.1157, -0.0787],
        [ 0.1460, -0.0257, -0.0707, -0.0748],
        [ 0.1767, -0.0939, -0.1072, -0.0482],
        [ 0.2406,  0.0115, -0.1371, -0.1175],
        [ 0.2722, -0.1101, -0.0654, -0.1316],
        [ 0.1074, -0.0433, -0.1235, -0.0471],
        [ 0.2657, -0.0407, -0.0518, -0.0812],
        [ 0.3313, -0.1114, -0.0640, -0.1108]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 3, 3, 1, 2, 0, 0, 0, 0, 3, 3, 2, 0, 3, 0, 0], device='cuda:0')


 82%|████████▏ | 14/17 [00:08<00:01,  1.61it/s]

tensor([[ 0.1145,  0.0917, -0.1469, -0.1399],
        [ 0.1250,  0.0041, -0.0363, -0.0235],
        [ 0.3968, -0.1071, -0.2126, -0.1369],
        [ 0.0889,  0.0096, -0.0648,  0.0872],
        [ 0.2047, -0.0133, -0.0267, -0.0782],
        [ 0.3216, -0.1061, -0.1854, -0.2294],
        [ 0.1794, -0.1031, -0.1305,  0.0214],
        [ 0.3016, -0.0728, -0.1209, -0.0407],
        [ 0.3764, -0.0886, -0.1906, -0.0869],
        [ 0.1411,  0.0329, -0.0098, -0.1134],
        [ 0.3494, -0.0503, -0.1283, -0.0971],
        [ 0.3235,  0.0267, -0.0963, -0.1138],
        [ 0.1995, -0.2075, -0.0976,  0.0554],
        [ 0.1514, -0.1362, -0.1722,  0.0080],
        [ 0.3058, -0.0989, -0.2286, -0.0541],
        [ 0.3723, -0.0819, -0.1340, -0.0957]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 2, 0, 3, 2, 0, 3, 3, 0, 1, 0, 0, 3, 3, 0, 0], device='cuda:0')


 88%|████████▊ | 15/17 [00:09<00:01,  1.61it/s]

tensor([[ 0.2549, -0.0732, -0.0815, -0.1195],
        [ 0.1840, -0.0547, -0.1194, -0.0510],
        [ 0.1682, -0.0612, -0.1002, -0.0743],
        [ 0.0674,  0.1248, -0.0020, -0.1300],
        [ 0.2112, -0.0495, -0.2159, -0.1108],
        [ 0.2951,  0.0322, -0.1388, -0.0674],
        [ 0.1960, -0.0725, -0.0863,  0.1263],
        [ 0.0763,  0.0981, -0.1254, -0.1218],
        [ 0.0816, -0.1287, -0.1085,  0.0216],
        [ 0.0584,  0.1028, -0.0491, -0.0980],
        [ 0.4060, -0.0894, -0.0644, -0.0665],
        [ 0.1920, -0.0031, -0.0695, -0.0855],
        [ 0.1729, -0.1004, -0.0524,  0.0064],
        [ 0.0843,  0.1075, -0.0522, -0.0847],
        [ 0.1445, -0.1127, -0.1089,  0.1054],
        [ 0.1172,  0.1109, -0.1048, -0.1077]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 3, 2, 1, 0, 0, 3, 1, 3, 1, 0, 0, 3, 1, 3, 1], device='cuda:0')


 94%|█████████▍| 16/17 [00:09<00:00,  1.61it/s]

tensor([[ 0.1516,  0.0722, -0.0464, -0.1349],
        [ 0.3286, -0.0853, -0.1116, -0.1565],
        [ 0.1931, -0.1245, -0.0991, -0.0855],
        [ 0.1863, -0.0110, -0.0415, -0.0946],
        [ 0.3170, -0.1029, -0.1657, -0.1226],
        [-0.0877, -0.0398, -0.1143,  0.1016],
        [ 0.0430, -0.0384, -0.0987,  0.0882],
        [ 0.1671, -0.0941, -0.1162,  0.0283],
        [ 0.1311,  0.0805, -0.0794, -0.1637],
        [ 0.2602, -0.1974, -0.1176, -0.0620],
        [ 0.1889, -0.0102, -0.0615, -0.1232],
        [ 0.2075, -0.0263, -0.0655, -0.0201],
        [ 0.2601, -0.0113, -0.1054, -0.0855],
        [ 0.1442,  0.0984, -0.0549, -0.0848],
        [ 0.0489, -0.0936, -0.1011,  0.0991],
        [ 0.0575,  0.0828, -0.0774, -0.0880]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 0, 2, 1, 0, 3, 3, 3, 1, 3, 0, 2, 2, 1, 3, 2], device='cuda:0')


100%|██████████| 17/17 [00:10<00:00,  1.62it/s]


Epoch 3/5, Loss: 1.2997, Accuracy: 0.4485


  0%|          | 0/17 [00:00<?, ?it/s]

tensor([[ 0.2616, -0.0679, -0.1824, -0.1995],
        [ 0.1127,  0.0474, -0.1129, -0.0377],
        [ 0.2800, -0.1671, -0.1205, -0.1241],
        [ 0.1679, -0.0500, -0.0847, -0.0201],
        [ 0.1382,  0.1189, -0.0253, -0.1698],
        [ 0.3197, -0.0433, -0.0694, -0.1065],
        [ 0.0496,  0.1604, -0.2066, -0.1214],
        [ 0.0850, -0.1038, -0.0735, -0.0082],
        [ 0.2229, -0.1152, -0.2124, -0.1048],
        [ 0.1918, -0.0029, -0.1633, -0.1163],
        [ 0.1025, -0.0289, -0.0174, -0.1214],
        [ 0.3141, -0.0752, -0.0923, -0.1957],
        [ 0.1030,  0.0752, -0.0864, -0.1481],
        [ 0.2276, -0.0028, -0.1583, -0.2140],
        [ 0.1437,  0.1714,  0.0005, -0.0374],
        [ 0.2584,  0.0210, -0.0863, -0.1316]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 2, 0, 0, 2, 0, 1, 3, 3, 1, 2, 0, 1, 0, 1, 2], device='cuda:0')


  6%|▌         | 1/17 [00:00<00:09,  1.62it/s]

tensor([[ 0.3230, -0.0490, -0.1651, -0.0995],
        [ 0.1144, -0.1183, -0.1492,  0.1356],
        [ 0.1768,  0.0722, -0.0466, -0.1243],
        [ 0.3136, -0.1090, -0.1313, -0.0704],
        [ 0.1718,  0.1457, -0.0306, -0.1496],
        [ 0.3522, -0.1018, -0.1059, -0.1752],
        [ 0.3489, -0.1257, -0.2005,  0.0317],
        [ 0.2111, -0.1089, -0.1364, -0.0803],
        [ 0.3480, -0.0190, -0.1171, -0.1522],
        [ 0.1184, -0.1254, -0.1116,  0.0799],
        [ 0.2072, -0.0449, -0.1139, -0.0657],
        [ 0.0596, -0.0072, -0.0532, -0.1188],
        [ 0.3033, -0.0521, -0.1641, -0.1492],
        [ 0.1675, -0.1306, -0.1114,  0.0138],
        [ 0.3474, -0.1782, -0.1971, -0.1303],
        [ 0.2787, -0.0927, -0.1132, -0.0669]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 3, 1, 0, 1, 0, 0, 3, 0, 3, 2, 2, 0, 3, 0, 0], device='cuda:0')


 12%|█▏        | 2/17 [00:01<00:09,  1.61it/s]

tensor([[ 0.4007, -0.0392, -0.1449, -0.1163],
        [ 0.0621,  0.1407, -0.0996, -0.1226],
        [ 0.3594, -0.1780, -0.1462, -0.1040],
        [ 0.0978, -0.0962, -0.1074,  0.0236],
        [ 0.1949, -0.0881, -0.1378,  0.0822],
        [ 0.2576, -0.0529, -0.0811, -0.1213],
        [ 0.2891, -0.1267, -0.1410, -0.1175],
        [ 0.3285, -0.0793, -0.1488, -0.1248],
        [ 0.2034, -0.0563, -0.0621, -0.0342],
        [ 0.3786, -0.0662, -0.1558, -0.0241],
        [ 0.2902, -0.0946, -0.1812, -0.0798],
        [ 0.1143, -0.0585, -0.0837,  0.0235],
        [ 0.0614,  0.1191, -0.0412, -0.1246],
        [ 0.1880, -0.1026, -0.0888,  0.0081],
        [ 0.0133,  0.0834, -0.1277, -0.1245],
        [ 0.1727, -0.0809, -0.1195,  0.0971]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 1, 0, 3, 3, 0, 0, 0, 3, 0, 0, 3, 1, 3, 1, 3], device='cuda:0')


 18%|█▊        | 3/17 [00:01<00:08,  1.61it/s]

tensor([[ 0.1488,  0.0660, -0.1489, -0.0650],
        [ 0.0571,  0.1071, -0.0818, -0.1832],
        [ 0.4089, -0.0907, -0.1846, -0.0960],
        [ 0.0587,  0.1193, -0.1599, -0.0100],
        [ 0.4954, -0.1320, -0.0852, -0.2121],
        [ 0.4011, -0.0926, -0.1151, -0.0747],
        [ 0.1562, -0.0394, -0.0530,  0.1515],
        [ 0.0244,  0.1319, -0.0720, -0.0126],
        [ 0.0270,  0.0010, -0.1962,  0.0410],
        [ 0.1895, -0.0460, -0.0220, -0.0991],
        [ 0.3118, -0.1488, -0.1168, -0.1014],
        [ 0.2266, -0.0029, -0.1681, -0.0974],
        [ 0.0389, -0.0592, -0.1092,  0.0347],
        [ 0.3349, -0.1162, -0.1568, -0.1433],
        [ 0.3068, -0.1408, -0.1421, -0.1885],
        [ 0.4131, -0.0994, -0.1421, -0.1551]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 1, 0, 1, 0, 0, 3, 2, 3, 0, 0, 2, 3, 0, 0, 0], device='cuda:0')


 24%|██▎       | 4/17 [00:02<00:08,  1.61it/s]

tensor([[ 0.0691,  0.0568, -0.0755, -0.1351],
        [ 0.1049,  0.2047, -0.1305, -0.1131],
        [ 0.1159,  0.0744, -0.0951, -0.0937],
        [ 0.0408, -0.0876, -0.1305,  0.0638],
        [ 0.0845,  0.0505, -0.1408, -0.0409],
        [ 0.0775, -0.0314, -0.0905, -0.0925],
        [ 0.1912,  0.0045, -0.0917, -0.0069],
        [ 0.2762, -0.1290, -0.0574,  0.0147],
        [ 0.1706, -0.1632, -0.0877,  0.1023],
        [ 0.2134, -0.1202, -0.1641, -0.0719],
        [ 0.0395,  0.1507, -0.0020, -0.0834],
        [ 0.3303, -0.0675, -0.1490, -0.0070],
        [ 0.1012,  0.1377, -0.1696, -0.0616],
        [ 0.0374,  0.1676, -0.1425, -0.0241],
        [ 0.3565, -0.1413, -0.1313, -0.1402],
        [ 0.3702, -0.1021, -0.1741, -0.1165]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 1, 2, 3, 1, 2, 2, 3, 3, 3, 1, 3, 1, 1, 0, 0], device='cuda:0')


 29%|██▉       | 5/17 [00:03<00:07,  1.61it/s]

tensor([[ 0.3933, -0.1176, -0.1564, -0.0772],
        [ 0.0909, -0.1989, -0.1671,  0.1762],
        [ 0.3535, -0.0595, -0.1651, -0.0828],
        [ 0.1960, -0.0466, -0.0383, -0.0792],
        [ 0.1480,  0.0603,  0.0331, -0.1513],
        [ 0.1147, -0.1483, -0.1514,  0.0956],
        [ 0.3288, -0.1136, -0.1052, -0.0430],
        [ 0.0879,  0.0746, -0.1019, -0.0998],
        [ 0.1552, -0.1844, -0.1951,  0.1314],
        [ 0.0808,  0.0665, -0.0481, -0.1138],
        [ 0.3460, -0.1216, -0.1763, -0.1282],
        [ 0.3830, -0.0790, -0.2548, -0.0676],
        [ 0.1615,  0.1397, -0.1152, -0.1745],
        [ 0.1061, -0.0957, -0.0522,  0.1251],
        [ 0.1014,  0.0598, -0.0603, -0.0781],
        [ 0.2186,  0.0272, -0.0900, -0.0757]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 3, 0, 2, 2, 3, 0, 2, 3, 2, 0, 0, 1, 3, 2, 2], device='cuda:0')


 35%|███▌      | 6/17 [00:03<00:06,  1.60it/s]

tensor([[ 0.4147, -0.1327, -0.1238, -0.0061],
        [ 0.3421, -0.0859, -0.1284, -0.1691],
        [ 0.2340, -0.0464, -0.0632, -0.0834],
        [ 0.2597, -0.0207, -0.0162, -0.1117],
        [ 0.4139, -0.1202, -0.1003, -0.0876],
        [ 0.4358, -0.1750, -0.1985, -0.1176],
        [ 0.2714, -0.0346, -0.0603, -0.0337],
        [ 0.0786, -0.0810, -0.0774,  0.1457],
        [ 0.1950,  0.0300, -0.0895, -0.0875],
        [ 0.2082, -0.0272, -0.1071, -0.1457],
        [ 0.2248,  0.0910, -0.0660, -0.0983],
        [ 0.0638,  0.1401, -0.1596, -0.1108],
        [ 0.0957,  0.1369, -0.0847, -0.1443],
        [ 0.5116, -0.1532, -0.1383, -0.1319],
        [ 0.2043,  0.0584, -0.0445, -0.0617],
        [ 0.2483, -0.2543, -0.1015,  0.0247]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 0, 2, 2, 0, 0, 2, 3, 1, 2, 2, 1, 1, 0, 2, 3], device='cuda:0')


 41%|████      | 7/17 [00:04<00:06,  1.60it/s]

tensor([[ 0.2128, -0.0250, -0.0802, -0.0357],
        [ 0.3973, -0.1639, -0.0807, -0.0542],
        [ 0.2709, -0.0272, -0.0832, -0.0766],
        [ 0.3547, -0.2079, -0.1294, -0.0346],
        [ 0.2247, -0.1890, -0.1496,  0.1043],
        [ 0.1340,  0.1011, -0.0421, -0.1727],
        [ 0.0740,  0.1601, -0.0409, -0.1418],
        [ 0.3511, -0.1378, -0.0956, -0.0453],
        [ 0.1342, -0.1476, -0.1119,  0.0971],
        [ 0.4015, -0.1289, -0.1591, -0.0824],
        [ 0.2165,  0.0455, -0.1423, -0.0695],
        [ 0.1923, -0.0070,  0.0026, -0.0826],
        [ 0.0186,  0.1523, -0.1383, -0.1556],
        [ 0.0940, -0.0780, -0.1203,  0.1861],
        [ 0.0602, -0.0768, -0.2056,  0.0973],
        [ 0.3091, -0.0752, -0.2102, -0.0113]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 0, 2, 0, 3, 2, 1, 3, 3, 0, 2, 2, 1, 3, 3, 3], device='cuda:0')


 47%|████▋     | 8/17 [00:04<00:05,  1.59it/s]

tensor([[ 0.2564,  0.0445, -0.0502, -0.1472],
        [ 0.0792, -0.1254, -0.1576,  0.1874],
        [ 0.0147,  0.1997, -0.1199, -0.1048],
        [ 0.0643, -0.1456, -0.1264,  0.0750],
        [ 0.3566, -0.1325, -0.1590, -0.1996],
        [ 0.1175,  0.0086, -0.1087,  0.0509],
        [ 0.0647,  0.0847, -0.0382, -0.1377],
        [ 0.0693,  0.2197, -0.1170, -0.1894],
        [ 0.3938, -0.1947, -0.2071, -0.0030],
        [ 0.1142,  0.0066, -0.1223, -0.1279],
        [ 0.1418, -0.0063, -0.0307, -0.1149],
        [ 0.1669, -0.0369, -0.1427, -0.1873],
        [ 0.3920, -0.0921, -0.1553, -0.1314],
        [ 0.0065,  0.1771, -0.0787, -0.1495],
        [ 0.0822,  0.0669, -0.1849, -0.1024],
        [ 0.3652, -0.0537, -0.0445, -0.0885]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 3, 1, 3, 0, 2, 1, 1, 0, 1, 2, 2, 0, 1, 1, 0], device='cuda:0')


 53%|█████▎    | 9/17 [00:05<00:05,  1.59it/s]

tensor([[ 0.3633, -0.1240, -0.1768, -0.1323],
        [ 0.4115, -0.2283, -0.1883, -0.1084],
        [ 0.0948,  0.2032, -0.1408, -0.1132],
        [ 0.4570, -0.1781, -0.1351, -0.0068],
        [ 0.1665,  0.0407, -0.0199, -0.1995],
        [ 0.3566, -0.1555, -0.1583, -0.0755],
        [ 0.3676, -0.1843, -0.2931, -0.1322],
        [ 0.1041,  0.1456, -0.1109, -0.1022],
        [ 0.4290, -0.1895, -0.1358, -0.1201],
        [ 0.0109,  0.1352, -0.0972, -0.1250],
        [ 0.0356, -0.0730, -0.1806,  0.0765],
        [ 0.3879, -0.0541, -0.1918, -0.1343],
        [ 0.1193,  0.1010,  0.0115, -0.1106],
        [ 0.2072, -0.0133, -0.0609, -0.1378],
        [ 0.1796,  0.0845, -0.0880, -0.0830],
        [ 0.1479, -0.0034, -0.1814, -0.0843]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 0, 1, 0, 2, 0, 0, 1, 0, 1, 3, 0, 1, 2, 2, 0], device='cuda:0')


 59%|█████▉    | 10/17 [00:06<00:04,  1.59it/s]

tensor([[ 0.3690, -0.1423, -0.1501, -0.1171],
        [ 0.4683, -0.1610, -0.1899, -0.0909],
        [ 0.0627, -0.0910, -0.1190,  0.1387],
        [ 0.0039,  0.1495, -0.1134, -0.0896],
        [ 0.4329, -0.1566, -0.2024, -0.2073],
        [ 0.1733,  0.0123, -0.0559, -0.1658],
        [ 0.0378,  0.1416, -0.1118, -0.0856],
        [ 0.1711, -0.1270, -0.1801,  0.0182],
        [-0.0228,  0.0617, -0.1071, -0.1552],
        [ 0.2530, -0.0950, -0.1316, -0.1042],
        [ 0.0976,  0.1746, -0.1080, -0.0488],
        [ 0.0702,  0.1792, -0.1815, -0.1057],
        [ 0.1809, -0.1646, -0.0704,  0.1119],
        [ 0.2828, -0.2030, -0.1436,  0.1135],
        [ 0.2312, -0.0336, -0.1136, -0.0581],
        [ 0.4912, -0.1489, -0.1791, -0.0894]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 0, 3, 1, 0, 2, 1, 3, 1, 0, 1, 1, 3, 3, 0, 0], device='cuda:0')


 65%|██████▍   | 11/17 [00:06<00:03,  1.59it/s]

tensor([[-0.0077,  0.0078, -0.1433,  0.1466],
        [ 0.0995, -0.0013, -0.0396, -0.1295],
        [ 0.2416, -0.1475, -0.1696,  0.1060],
        [ 0.2523, -0.1589, -0.1299, -0.0462],
        [-0.0432,  0.0150, -0.0222,  0.1024],
        [ 0.1060, -0.0896, -0.1408,  0.1538],
        [ 0.3034, -0.0856, -0.1640, -0.1302],
        [ 0.2126,  0.0209,  0.0252, -0.1220],
        [ 0.4009, -0.1310, -0.1440, -0.0645],
        [ 0.3990, -0.1337, -0.2262, -0.1099],
        [ 0.0421,  0.0021, -0.0239, -0.1737],
        [ 0.1262,  0.1105, -0.0243, -0.1302],
        [ 0.0178,  0.0330, -0.0298, -0.1718],
        [ 0.0439,  0.2114, -0.1199, -0.1615],
        [ 0.0508, -0.0054, -0.0705, -0.0884],
        [ 0.1463,  0.0427, -0.0282, -0.1575]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 1, 3, 0, 3, 3, 0, 2, 0, 0, 2, 2, 1, 1, 2, 2], device='cuda:0')


 71%|███████   | 12/17 [00:07<00:03,  1.58it/s]

tensor([[ 0.2021, -0.0720, -0.1088, -0.1464],
        [ 0.0509,  0.1900, -0.1294, -0.0802],
        [ 0.4144, -0.1460, -0.2469, -0.2007],
        [ 0.0160,  0.1971, -0.1341, -0.0707],
        [ 0.3958, -0.0530, -0.1964, -0.0862],
        [ 0.1847,  0.0332,  0.0033, -0.1808],
        [ 0.3656, -0.1690, -0.1301, -0.0493],
        [ 0.3501, -0.1359, -0.1849, -0.1305],
        [ 0.4202, -0.2116, -0.1574, -0.1073],
        [ 0.1097,  0.1348, -0.1207, -0.1085],
        [ 0.0432, -0.1243, -0.1903,  0.1298],
        [ 0.4180, -0.1587, -0.1665, -0.0607],
        [ 0.4465, -0.2171, -0.1810, -0.2545],
        [ 0.4718, -0.1432, -0.2366, -0.0690],
        [ 0.4149, -0.1170, -0.1775, -0.1815],
        [ 0.2737, -0.0500, -0.0963, -0.0529]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 1, 0, 1, 0, 2, 0, 0, 0, 1, 3, 0, 0, 0, 0, 2], device='cuda:0')


 76%|███████▋  | 13/17 [00:08<00:02,  1.58it/s]

tensor([[ 0.2514, -0.0646, -0.0510, -0.1151],
        [ 0.0464,  0.1436, -0.1175, -0.0939],
        [ 0.1943, -0.2014, -0.1114,  0.1514],
        [ 0.1296, -0.1215, -0.0950,  0.1173],
        [ 0.0688, -0.1362, -0.0914,  0.1769],
        [ 0.1713, -0.1084, -0.1151, -0.1934],
        [ 0.0475,  0.1477, -0.1398, -0.1581],
        [ 0.5387, -0.1891, -0.2553, -0.1995],
        [ 0.1184,  0.0976, -0.0205, -0.1005],
        [ 0.0909,  0.1725, -0.0858, -0.1644],
        [ 0.0029, -0.1227, -0.1944,  0.1877],
        [ 0.0533,  0.2177, -0.0664, -0.0943],
        [ 0.4584, -0.2127, -0.1394, -0.1619],
        [ 0.4805, -0.0507, -0.0952, -0.0993],
        [ 0.2278, -0.1008, -0.1022,  0.1422],
        [ 0.4070, -0.1238, -0.2159, -0.0483]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 1, 3, 3, 3, 2, 1, 0, 2, 1, 3, 2, 0, 0, 3, 0], device='cuda:0')


 82%|████████▏ | 14/17 [00:08<00:01,  1.59it/s]

tensor([[ 0.2219, -0.1985, -0.1136,  0.0907],
        [ 0.5483, -0.1812, -0.0707, -0.1690],
        [ 0.3512, -0.2172, -0.2161, -0.0578],
        [ 0.2467, -0.1570, -0.1493,  0.1204],
        [ 0.0961,  0.1562, -0.1734, -0.1963],
        [ 0.2780, -0.1734, -0.1842,  0.0351],
        [ 0.4233, -0.1321, -0.2359, -0.1565],
        [ 0.4480, -0.2100, -0.2406, -0.1752],
        [ 0.3338, -0.0977, -0.2250, -0.1630],
        [ 0.4766, -0.1442, -0.2412, -0.0994],
        [ 0.1288, -0.0249, -0.0396, -0.1723],
        [ 0.1637, -0.1984, -0.1624,  0.0126],
        [ 0.2505, -0.2054, -0.2208,  0.1051],
        [ 0.2418, -0.2290, -0.1250,  0.1072],
        [ 0.0352, -0.0549, -0.2864,  0.0975],
        [ 0.4348, -0.0978, -0.1346, -0.1143]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 0, 0, 3, 1, 3, 0, 0, 0, 0, 2, 3, 3, 3, 3, 0], device='cuda:0')


 88%|████████▊ | 15/17 [00:09<00:01,  1.59it/s]

tensor([[ 0.4592, -0.2177, -0.1432, -0.1079],
        [ 0.4678, -0.0895, -0.2514, -0.1037],
        [ 0.0088,  0.1701, -0.0450,  0.0058],
        [ 0.0575, -0.0804, -0.1217,  0.1336],
        [ 0.4270, -0.1447, -0.1360, -0.0815],
        [ 0.4533, -0.1351, -0.2786, -0.1460],
        [ 0.0248,  0.2154, -0.0496, -0.1881],
        [ 0.0349,  0.2648, -0.0972, -0.1770],
        [ 0.1146,  0.0439, -0.1080, -0.0602],
        [ 0.2564,  0.0156, -0.0825, -0.1190],
        [ 0.4292, -0.2597, -0.1847, -0.1695],
        [ 0.4799, -0.1419, -0.1082, -0.2210],
        [ 0.0331,  0.1409,  0.0332, -0.0672],
        [ 0.0267, -0.1407, -0.1417,  0.2485],
        [ 0.4123, -0.1863, -0.1516, -0.0573],
        [ 0.0110,  0.2490, -0.1286, -0.1760]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 0, 1, 3, 0, 0, 1, 1, 2, 1, 0, 0, 1, 3, 0, 1], device='cuda:0')


 94%|█████████▍| 16/17 [00:10<00:00,  1.59it/s]

tensor([[ 0.4732, -0.1506, -0.2289, -0.0459],
        [ 0.3902, -0.2482, -0.2215, -0.1352],
        [ 0.2404,  0.0316,  0.0759, -0.1640],
        [ 0.2475, -0.0903, -0.1976, -0.0381],
        [ 0.4025, -0.0938, -0.2151, -0.2015],
        [ 0.0494,  0.1609, -0.1369, -0.1414],
        [ 0.0573,  0.1594, -0.0455, -0.1969],
        [ 0.0723,  0.1640, -0.1340, -0.1310],
        [-0.0327, -0.1033, -0.1038,  0.1970],
        [ 0.5510, -0.1500, -0.2057, -0.1762],
        [ 0.4077, -0.1546, -0.1784, -0.1104],
        [ 0.0783,  0.1586, -0.1172, -0.1426],
        [ 0.4537, -0.1508, -0.2015, -0.0631],
        [ 0.1454, -0.1196, -0.1105,  0.1896],
        [ 0.4937, -0.1409, -0.2247, -0.0605],
        [-0.0313,  0.1188,  0.0153, -0.0924]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 0, 2, 2, 0, 1, 1, 1, 3, 0, 0, 1, 0, 3, 0, 2], device='cuda:0')


100%|██████████| 17/17 [00:10<00:00,  1.59it/s]


Epoch 4/5, Loss: 1.2231, Accuracy: 0.6176


  0%|          | 0/17 [00:00<?, ?it/s]

tensor([[ 0.1991,  0.0700, -0.0044, -0.1248],
        [ 0.1308, -0.1120, -0.2473,  0.1684],
        [ 0.0889,  0.1488, -0.1364, -0.1659],
        [ 0.0726, -0.1888, -0.1943,  0.2061],
        [ 0.0093,  0.1828, -0.0853, -0.1964],
        [ 0.1206, -0.1473, -0.2248,  0.2215],
        [ 0.1781,  0.0173, -0.0224, -0.0355],
        [ 0.5075, -0.1674, -0.2218, -0.1922],
        [ 0.2629, -0.2636, -0.1137,  0.0818],
        [ 0.5141, -0.2437, -0.1654, -0.1892],
        [-0.0499,  0.2438, -0.0421, -0.1684],
        [ 0.2234, -0.1464, -0.1210,  0.1570],
        [ 0.1475, -0.0221, -0.0312, -0.0213],
        [ 0.5459, -0.2277, -0.2617, -0.1162],
        [ 0.5155, -0.2365, -0.2219, -0.1346],
        [ 0.2821, -0.1338, -0.1661,  0.0570]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 3, 1, 3, 1, 3, 2, 0, 3, 0, 1, 3, 2, 0, 0, 3], device='cuda:0')


  6%|▌         | 1/17 [00:00<00:10,  1.58it/s]

tensor([[ 0.1861,  0.1078, -0.0580, -0.1364],
        [ 0.2275, -0.1823, -0.1678,  0.0646],
        [ 0.4493, -0.1121, -0.1743, -0.1419],
        [ 0.4190, -0.1621, -0.2019,  0.0247],
        [ 0.0469,  0.1592, -0.1226, -0.1093],
        [ 0.1015,  0.1289, -0.1173, -0.0809],
        [ 0.0388,  0.2623, -0.1176, -0.1248],
        [ 0.5114, -0.2181, -0.1970, -0.0832],
        [ 0.5200, -0.1849, -0.0950, -0.0839],
        [ 0.0971, -0.0182, -0.0311, -0.1591],
        [ 0.0716, -0.2192, -0.1556,  0.1658],
        [ 0.2097,  0.1049,  0.0416, -0.0955],
        [ 0.2651, -0.1162, -0.0746, -0.0882],
        [ 0.4911, -0.2495, -0.2105, -0.0663],
        [-0.0059,  0.1710, -0.0632, -0.1268],
        [ 0.5210, -0.2972, -0.1899, -0.0693]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 3, 0, 0, 1, 1, 1, 0, 0, 2, 3, 2, 2, 0, 1, 0], device='cuda:0')


 12%|█▏        | 2/17 [00:01<00:09,  1.58it/s]

tensor([[ 0.1962, -0.0045, -0.0495, -0.1434],
        [ 0.0777,  0.1966, -0.0826, -0.1160],
        [ 0.2751, -0.0990, -0.1757, -0.0291],
        [ 0.0475,  0.0871, -0.0582, -0.1616],
        [ 0.0143,  0.2112, -0.0495, -0.1229],
        [ 0.2963, -0.0728, -0.1285, -0.1334],
        [-0.0154,  0.1822, -0.0479, -0.1322],
        [ 0.0889, -0.1948, -0.1233,  0.1782],
        [ 0.5353, -0.2394, -0.1695, -0.2086],
        [ 0.2077, -0.2228, -0.1257,  0.2000],
        [ 0.1300,  0.0815, -0.0011, -0.1239],
        [ 0.1411,  0.0232, -0.0357, -0.1269],
        [ 0.2312, -0.0456, -0.2383, -0.1122],
        [ 0.0607, -0.2389, -0.1630,  0.2034],
        [ 0.0416, -0.1832, -0.1439,  0.1423],
        [ 0.5149, -0.2313, -0.2143,  0.0077]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 1, 3, 2, 1, 0, 1, 3, 0, 3, 2, 2, 1, 3, 3, 0], device='cuda:0')


 18%|█▊        | 3/17 [00:01<00:08,  1.59it/s]

tensor([[-0.0179,  0.2229, -0.0556, -0.1371],
        [ 0.5130, -0.2004, -0.2171, -0.2634],
        [ 0.5171, -0.1865, -0.1989, -0.1012],
        [ 0.0014,  0.2176, -0.1394, -0.1421],
        [ 0.4244, -0.2007, -0.2505, -0.1363],
        [ 0.1511,  0.0062, -0.0239, -0.0675],
        [ 0.5394, -0.1508, -0.1941, -0.1963],
        [ 0.0121,  0.2772, -0.0390, -0.1771],
        [ 0.0232, -0.1830, -0.2124,  0.2354],
        [ 0.5393, -0.2628, -0.1642, -0.1351],
        [ 0.0874,  0.0898, -0.0190, -0.2317],
        [ 0.1119, -0.1543, -0.1878,  0.2615],
        [ 0.5172, -0.1195, -0.1671, -0.1338],
        [ 0.5620, -0.1732, -0.2598, -0.0962],
        [ 0.4507, -0.1879, -0.1259, -0.0466],
        [ 0.4470, -0.1353, -0.1615, -0.1010]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 0, 0, 1, 0, 2, 0, 1, 3, 0, 2, 3, 0, 0, 0, 0], device='cuda:0')


 24%|██▎       | 4/17 [00:02<00:08,  1.59it/s]

tensor([[ 0.2265, -0.2318, -0.1481,  0.1248],
        [ 0.6326, -0.2553, -0.2403, -0.0940],
        [ 0.6057, -0.2089, -0.1980, -0.0853],
        [ 0.3258, -0.2251, -0.1249, -0.0349],
        [ 0.0035,  0.1484, -0.1477, -0.0968],
        [ 0.0583, -0.1743, -0.2089,  0.1368],
        [ 0.4580, -0.2152, -0.1890, -0.0484],
        [ 0.5866, -0.0952, -0.2152, -0.2552],
        [ 0.0309, -0.1982, -0.1846,  0.1991],
        [ 0.4822, -0.1807, -0.2095, -0.0905],
        [ 0.0045,  0.1613, -0.1195, -0.1529],
        [ 0.0051,  0.2012, -0.1286, -0.1736],
        [ 0.0298,  0.2034, -0.0405, -0.1457],
        [ 0.0975,  0.0394,  0.0641, -0.2170],
        [-0.0090,  0.2958, -0.1018, -0.0988],
        [ 0.0485,  0.2790, -0.1110, -0.1336]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 0, 0, 0, 1, 3, 2, 0, 3, 0, 1, 1, 1, 2, 1, 1], device='cuda:0')


 29%|██▉       | 5/17 [00:03<00:07,  1.58it/s]

tensor([[ 0.5141, -0.2813, -0.1857, -0.1734],
        [ 0.0872,  0.0188, -0.0207, -0.1197],
        [-0.0419,  0.1981, -0.0455, -0.2023],
        [ 0.1512, -0.2075, -0.1684,  0.2269],
        [ 0.5699, -0.2297, -0.2818, -0.1055],
        [ 0.0796,  0.0054,  0.0261, -0.1555],
        [ 0.5937, -0.1837, -0.2354, -0.1255],
        [ 0.5441, -0.2073, -0.2468, -0.0988],
        [ 0.4585, -0.2038, -0.1000, -0.0713],
        [-0.0608, -0.1769, -0.1141,  0.2092],
        [-0.0195,  0.1938, -0.1170, -0.2037],
        [ 0.5519, -0.2186, -0.1779, -0.1716],
        [ 0.1017, -0.2529, -0.2225,  0.1648],
        [ 0.6263, -0.1371, -0.0371, -0.1462],
        [ 0.5594, -0.2379, -0.2420, -0.1730],
        [ 0.0736,  0.1654, -0.0422, -0.1918]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 2, 1, 3, 0, 2, 0, 0, 0, 3, 1, 0, 3, 0, 0, 1], device='cuda:0')


 35%|███▌      | 6/17 [00:03<00:06,  1.58it/s]

tensor([[ 0.6240, -0.2883, -0.2071, -0.0977],
        [ 0.4936, -0.1981, -0.1359, -0.2275],
        [ 0.1387,  0.0424, -0.0333, -0.1195],
        [ 0.6765, -0.2580, -0.1607, -0.0554],
        [ 0.3244, -0.1217, -0.0403, -0.1678],
        [ 0.1476, -0.1429, -0.1344,  0.1971],
        [ 0.5744, -0.2071, -0.2394, -0.1541],
        [ 0.1332,  0.0166, -0.1080, -0.0755],
        [ 0.0417,  0.2886, -0.1351, -0.1572],
        [ 0.4981, -0.1853, -0.1823, -0.0519],
        [ 0.2674, -0.1887, -0.2286,  0.1573],
        [-0.0304,  0.1480,  0.0380, -0.1451],
        [ 0.0247,  0.2076, -0.0463, -0.2028],
        [ 0.1613, -0.1473, -0.1142,  0.1499],
        [ 0.0095,  0.2054, -0.0518, -0.2255],
        [ 0.0904, -0.1423, -0.2270,  0.2520]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 0, 0, 0, 2, 3, 0, 2, 1, 0, 3, 2, 1, 3, 1, 3], device='cuda:0')


 41%|████      | 7/17 [00:04<00:06,  1.58it/s]

tensor([[ 0.1178,  0.1104, -0.0903, -0.0309],
        [ 0.0796, -0.1590, -0.1769,  0.1190],
        [ 0.1197,  0.0437, -0.0492, -0.1381],
        [-0.0041,  0.2635, -0.1367, -0.1423],
        [ 0.2618, -0.1912, -0.1653,  0.1523],
        [ 0.5020, -0.1083, -0.1420, -0.1681],
        [ 0.0592,  0.1646, -0.0977, -0.1743],
        [-0.0180, -0.1923, -0.1381,  0.3310],
        [ 0.4810, -0.1717, -0.1659, -0.1209],
        [ 0.1280, -0.1246, -0.0988,  0.1650],
        [ 0.2101, -0.0080, -0.0233, -0.1263],
        [ 0.5175, -0.2372, -0.2238, -0.0987],
        [-0.0182,  0.0795, -0.0103, -0.1791],
        [ 0.0842,  0.0257,  0.0075, -0.0495],
        [ 0.0129, -0.1918, -0.1511,  0.2669],
        [ 0.5692, -0.2326, -0.2461, -0.0448]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 3, 2, 1, 3, 0, 2, 3, 0, 3, 2, 0, 2, 2, 3, 0], device='cuda:0')


 47%|████▋     | 8/17 [00:05<00:05,  1.58it/s]

tensor([[ 5.4721e-02,  1.1702e-01, -8.4357e-03, -1.5682e-01],
        [ 2.0237e-02,  1.2432e-01,  6.5610e-02, -1.4525e-01],
        [-5.7919e-02,  2.2358e-01, -7.4195e-02, -2.3910e-01],
        [ 6.1383e-04,  1.7851e-01, -3.7126e-02, -2.0718e-01],
        [ 2.9075e-01, -1.8934e-01, -8.0165e-02,  2.3698e-02],
        [ 2.7045e-01,  2.5972e-02, -2.7686e-02, -4.7177e-02],
        [ 6.4242e-01, -2.3363e-01, -2.1821e-01, -1.7193e-01],
        [ 1.8979e-01,  5.8682e-02,  1.8350e-02, -2.5104e-01],
        [ 4.0292e-01, -1.2316e-01, -1.9225e-01, -1.1240e-01],
        [ 6.3734e-01, -2.0917e-01, -2.8395e-01, -1.8199e-01],
        [ 5.9445e-01, -2.2131e-01, -2.0765e-01, -8.2697e-02],
        [-4.2596e-02, -2.7483e-02, -1.9200e-01,  1.7705e-01],
        [ 8.8464e-02,  1.5086e-01, -6.9142e-02, -1.5031e-01],
        [-4.8942e-02,  2.7404e-01, -2.4539e-02, -1.9462e-01],
        [ 6.7220e-02,  1.4973e-01, -1.7827e-01, -1.8602e-01],
        [ 1.0120e-01, -2.4855e-01, -1.5455e-01,  2.1987e-01]], device=

 53%|█████▎    | 9/17 [00:05<00:05,  1.58it/s]

tensor([[ 0.0338, -0.1729, -0.1493,  0.3355],
        [ 0.5261, -0.2415, -0.1930, -0.1825],
        [ 0.5324, -0.2256, -0.2707, -0.2300],
        [ 0.1473,  0.0724, -0.0390, -0.0877],
        [ 0.5620, -0.2342, -0.1964, -0.2539],
        [ 0.5562, -0.2158, -0.2029, -0.1949],
        [ 0.6047, -0.2724, -0.2341, -0.1416],
        [ 0.5267, -0.1652, -0.2854, -0.1011],
        [ 0.5828, -0.2681, -0.2483, -0.1215],
        [ 0.2990, -0.0016, -0.0571, -0.1936],
        [ 0.2271,  0.0405, -0.0303, -0.2005],
        [ 0.3683, -0.3015, -0.1537,  0.0820],
        [ 0.6739, -0.3536, -0.2389, -0.0813],
        [ 0.6629, -0.1629, -0.2611, -0.1809],
        [-0.0527,  0.2996, -0.0460, -0.1687],
        [ 0.2390, -0.2199, -0.2062,  0.0696]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 1, 3], device='cuda:0')


 59%|█████▉    | 10/17 [00:06<00:04,  1.58it/s]

tensor([[-0.0690,  0.2659, -0.0901, -0.1576],
        [ 0.4990, -0.1872, -0.2165, -0.2191],
        [ 0.6048, -0.1645, -0.1757, -0.1762],
        [ 0.5507, -0.2507, -0.1123, -0.2938],
        [ 0.1035, -0.2315, -0.2060,  0.2727],
        [ 0.0647,  0.0763, -0.1223, -0.0561],
        [ 0.1741,  0.0998,  0.0078, -0.1826],
        [ 0.5587, -0.3290, -0.1698, -0.1157],
        [ 0.5373, -0.1996, -0.1511, -0.1884],
        [ 0.2303, -0.1790, -0.1498,  0.1745],
        [ 0.7259, -0.1896, -0.2682, -0.2297],
        [ 0.0746,  0.2647, -0.1075, -0.1192],
        [ 0.1016, -0.1522, -0.0774,  0.2111],
        [ 0.7061, -0.3004, -0.2185, -0.2258],
        [ 0.1871,  0.0019, -0.0172, -0.2083],
        [ 0.3450, -0.2157, -0.1695,  0.0561]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 0, 0, 0, 3, 2, 2, 0, 0, 3, 0, 1, 3, 0, 2, 3], device='cuda:0')


 65%|██████▍   | 11/17 [00:06<00:03,  1.57it/s]

tensor([[ 0.2549,  0.0751,  0.0269, -0.1109],
        [ 0.6550, -0.2375, -0.2127, -0.1901],
        [ 0.3149,  0.0515, -0.0774, -0.2462],
        [ 0.4545, -0.2585, -0.2480, -0.2248],
        [ 0.4326, -0.2240, -0.2326,  0.1085],
        [ 0.0108,  0.2255, -0.0193, -0.2017],
        [ 0.5683, -0.2115, -0.1742, -0.1936],
        [ 0.6919, -0.3066, -0.2662, -0.1564],
        [-0.0041,  0.2808, -0.0935, -0.1684],
        [ 0.0717, -0.1112, -0.0511,  0.2838],
        [ 0.7004, -0.2564, -0.0372, -0.1240],
        [-0.0454,  0.2786, -0.0518, -0.1414],
        [ 0.6006, -0.2137, -0.1571, -0.1005],
        [ 0.5892, -0.2883, -0.2041, -0.0438],
        [ 0.1898, -0.0601, -0.1346,  0.0666],
        [-0.1004,  0.2683, -0.0840, -0.1805]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 0, 2, 0, 3, 1, 0, 0, 1, 3, 0, 1, 0, 0, 3, 1], device='cuda:0')


 71%|███████   | 12/17 [00:07<00:03,  1.57it/s]

tensor([[ 0.0149,  0.1049, -0.0297, -0.1929],
        [ 0.0432,  0.2225, -0.1139, -0.1470],
        [-0.0508, -0.2039, -0.1516,  0.2456],
        [ 0.1572, -0.0192,  0.0121, -0.1377],
        [ 0.6147, -0.2074, -0.2356, -0.2270],
        [ 0.0790,  0.0453, -0.0008, -0.1255],
        [-0.0702,  0.2768, -0.1093, -0.2060],
        [ 0.1330, -0.2382, -0.2814,  0.2694],
        [ 0.1726, -0.2981, -0.1504,  0.2356],
        [ 0.5795, -0.1868, -0.1495, -0.2563],
        [-0.0044,  0.2450, -0.1092, -0.1502],
        [-0.0778,  0.2708, -0.1200, -0.1650],
        [-0.0227,  0.2068, -0.0968, -0.1555],
        [ 0.5790, -0.2798, -0.1655, -0.1616],
        [ 0.4948, -0.1029, -0.2350, -0.1890],
        [ 0.6627, -0.2477, -0.2163, -0.1504]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 1, 3, 2, 0, 2, 1, 3, 3, 0, 1, 1, 1, 0, 0, 0], device='cuda:0')


 76%|███████▋  | 13/17 [00:08<00:02,  1.57it/s]

tensor([[ 0.0304,  0.2482, -0.0520, -0.1092],
        [ 0.5620, -0.2789, -0.1278, -0.1390],
        [ 0.0780,  0.1966, -0.1628, -0.1669],
        [-0.0157,  0.2877, -0.0892, -0.1801],
        [ 0.5516, -0.1800, -0.1791, -0.0965],
        [-0.0490,  0.3015, -0.0790, -0.1998],
        [ 0.1973, -0.0102, -0.0210, -0.0895],
        [ 0.6621, -0.2005, -0.1995, -0.2260],
        [ 0.5508, -0.2792, -0.2773, -0.1143],
        [ 0.0107, -0.1824, -0.1765,  0.3409],
        [ 0.4296, -0.2520, -0.2004, -0.0136],
        [-0.0310,  0.3015, -0.0924, -0.1476],
        [ 0.6599, -0.2404, -0.3560, -0.1507],
        [ 0.1291, -0.1740, -0.2916,  0.2077],
        [-0.0014,  0.2711, -0.0650, -0.1286],
        [-0.0751, -0.2504, -0.1805,  0.2910]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([1, 0, 1, 1, 0, 1, 2, 0, 0, 3, 0, 1, 0, 3, 1, 3], device='cuda:0')


 82%|████████▏ | 14/17 [00:08<00:01,  1.56it/s]

tensor([[ 0.0657,  0.0083, -0.1215, -0.0272],
        [-0.0238,  0.2804, -0.0659, -0.1672],
        [ 0.5845, -0.2731, -0.2966, -0.2275],
        [ 0.6430, -0.3161, -0.2601, -0.1420],
        [ 0.7420, -0.2263, -0.2118, -0.2565],
        [ 0.1304,  0.0238, -0.0518, -0.0984],
        [ 0.1311, -0.2030, -0.1593,  0.2771],
        [ 0.0154,  0.1572, -0.0134, -0.1365],
        [ 0.6255, -0.2222, -0.2067, -0.2359],
        [-0.0215,  0.3474, -0.0976, -0.1555],
        [ 0.0440,  0.2641, -0.1017, -0.1310],
        [ 0.1849, -0.0444, -0.0674, -0.1562],
        [ 0.7265, -0.2215, -0.2951, -0.1938],
        [ 0.1213,  0.0717, -0.0200, -0.2030],
        [ 0.5625, -0.2025, -0.2239, -0.0962],
        [ 0.5585, -0.0935, -0.1486, -0.2456]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([2, 1, 0, 0, 0, 2, 3, 2, 0, 1, 1, 1, 0, 2, 0, 0], device='cuda:0')


 88%|████████▊ | 15/17 [00:09<00:01,  1.56it/s]

tensor([[ 0.0607, -0.2963, -0.1809,  0.3116],
        [ 0.1427,  0.0501, -0.2006, -0.0968],
        [ 0.0374,  0.2597, -0.1291, -0.2362],
        [ 0.1449,  0.0474, -0.0578, -0.1951],
        [ 0.1608,  0.1315, -0.1372, -0.2302],
        [ 0.1598, -0.3269, -0.1411,  0.2847],
        [ 0.5818, -0.2614, -0.2151, -0.1394],
        [ 0.0704, -0.1627, -0.2226,  0.2984],
        [ 0.0743,  0.1109, -0.0650, -0.1344],
        [ 0.5746, -0.2621, -0.2530, -0.2556],
        [ 0.1177,  0.0696,  0.0493, -0.2271],
        [ 0.1319, -0.0282, -0.0254, -0.1656],
        [-0.0429,  0.0588,  0.0239, -0.0713],
        [ 0.0553, -0.1780, -0.1887,  0.3193],
        [ 0.6523, -0.2622, -0.0979, -0.2378],
        [ 0.5566, -0.3404, -0.1850, -0.0811]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([3, 1, 1, 2, 1, 3, 0, 3, 2, 0, 2, 2, 2, 3, 0, 0], device='cuda:0')


 94%|█████████▍| 16/17 [00:10<00:00,  1.56it/s]

tensor([[ 0.4912, -0.1805, -0.2642, -0.1257],
        [ 0.0774,  0.1614,  0.0056, -0.1844],
        [-0.0459,  0.2667, -0.1976, -0.1853],
        [ 0.2679, -0.3303, -0.2262,  0.2706],
        [ 0.0027, -0.1971, -0.1950,  0.3897],
        [ 0.3869, -0.0951, -0.1647, -0.2090],
        [ 0.0839,  0.0135, -0.0328, -0.0580],
        [-0.1033, -0.0944, -0.1646,  0.2436],
        [-0.0382,  0.2006,  0.0058, -0.1268],
        [ 0.6542, -0.2217, -0.1825, -0.2173],
        [ 0.1105,  0.0523, -0.0213, -0.1347],
        [ 0.1271, -0.1245, -0.2609,  0.1422],
        [ 0.0310, -0.2059, -0.2279,  0.3160],
        [ 0.5999, -0.2987, -0.2027, -0.1376],
        [-0.0858, -0.2196, -0.2851,  0.3884],
        [ 0.5615, -0.2873, -0.1590, -0.1165]], device='cuda:0',
       grad_fn=<AddmmBackward0>) tensor([0, 2, 1, 3, 3, 0, 2, 3, 2, 0, 2, 3, 3, 0, 3, 0], device='cuda:0')


100%|██████████| 17/17 [00:10<00:00,  1.57it/s]

Epoch 5/5, Loss: 1.1244, Accuracy: 0.7243
Training finished.





In [None]:
model.eval()

test_correct = 0
test_total = 0

for input_ids_batch, attention_masks_batch, y_batch in tqdm(test_loader):
  y_batch = y_batch.to(device)
  y_pred = model(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0]
  _, predicted = torch.max(y_pred, 1)
  test_correct += (predicted == y_batch).sum()
  test_total += len(y_batch)

print("Accuracy:", test_correct.float() / test_total)

100%|██████████| 4/4 [00:00<00:00,  5.44it/s]

Accuracy: tensor(0.8361, device='cuda:0')





In [None]:
# 모델 저장하기
torch.save(model.state_dict(), "/content/drive/MyDrive/model.pt")