In [55]:
import xlrd
from pathlib import Path
import pandas as pd
import numpy as np
from numba import decorators
import librosa

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset


In [56]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [57]:
# read in the data file
# Give the location of the file 

df = pd.read_excel(r'data/data.xlsx', sheet_name='reduced totals')
# print(df)

In [66]:
## LOADING IN DATASETS

dataset = Path.cwd().joinpath("SongEmotionDataset")
datasheet = Path.cwd().joinpath("data") # for csua

#emotion labels
label_loc = datasheet.joinpath("data.xlsx")
wb = xlrd.open_workbook(label_loc) 
sheet = wb.sheet_by_index(2)

#emotion arr
emotions = ["amazement", "calmness", "power", "joyful activation", "sadness"]
# emotions = ["amazement", "solemnity", "tenderness", "nostalgia", "calmness", "power", "joyful activation", "tension", "sadness"]


train_song = []
test_song = []
train_emotion = []
test_emotion = []


row_totals = torch.zeros(len(emotions))
for i in range(1, 401):
    row_totals += 


for i in range(1, 401):
    count_total = sheet.cell_value(i, 7)
    
    emotions_counter = [0 for e in emotions]
    if i % 5 == 0:
        test_song.append(dataset.joinpath("{}.mp3".format(i)))
        emotion_arr = []
        for j in range(5):
            emotion_arr.append(sheet.cell_value(i, 2 + j))
        test_emotion.append(torch.tensor(emotion_arr, device=device).float())
        
    emotions_counter = [0 for e in emotions]
    else:
        train_song.append(dataset.joinpath("{}.mp3".format(i)))
        emotion_arr = []
        for j in range(5):
            emotion_arr.append(sheet.cell_value(i, 2 + j))
        train_emotion.append(torch.tensor(emotion_arr, device=device))

print(len(train_song), len(test_song))
print(len(train_emotion), len(test_emotion))

320 80
320 80


In [67]:
# train_emotion

In [68]:
class SongEmotionDataset(Dataset):
    """
    Song Emotion Dataset. Uses librosa to process mp3 files.
    Takes first 20 seconds, and samples every 10 to get processed audio tensor.
    """

    def __init__(self, mp3, labels, transform=None):
        """
        Args:
            mp3: list of paths to mp3 files
            labels: list of labels
        """
        self.labels = labels
        self.mp3 = mp3
        self.cache = {}
        
    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        if index not in self.cache.keys():
#             print("index of " + str(index) + " was cached!")
            data, rate = librosa.load(self.mp3[index], sr=16000, duration=10)
            mfccs = librosa.feature.mfcc(y=data, sr=rate, n_mfcc=40)
            assert rate == 16000
            sample_tensor = torch.tensor(mfccs, device=device).float()
            downsampled_tensor = sample_tensor[::10]
    #         print(mfccs.shape, data.shape)

            self.cache[index] = (downsampled_tensor, F.softmax(self.labels[index]))
#         else:
#             print("index was cached! index of " + str(index))
        return self.cache[index]

In [69]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, (2,2))
        self.conv2 = nn.Conv2d(32, 64, (2,2))
        self.pool1 = nn.MaxPool2d((2,2))
        self.drop1 = nn.Dropout(p=0.25)
        self.flat1 = nn.Flatten()
        self.dense1 = nn.Linear(9920, 128)
        self.drop2 = nn.Dropout(p=0.5)
        self.dense2 = nn.Linear(128, 5)
        
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        
        x = self.pool1(x)
        x = self.drop1(x)
        x = self.flat1(x)
        x = self.dense1(x)
        x = F.relu(x)
        
        x = self.drop2(x)
        x = self.dense2(x)
        x = F.relu(x)
        
        return F.log_softmax(x, dim = 1)

model = Net()
model.to(device)
print(model)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(2, 2), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(2, 2), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (drop1): Dropout(p=0.25, inplace=False)
  (flat1): Flatten()
  (dense1): Linear(in_features=9920, out_features=128, bias=True)
  (drop2): Dropout(p=0.5, inplace=False)
  (dense2): Linear(in_features=128, out_features=9, bias=True)
)


In [70]:
train_set = SongEmotionDataset(train_song, train_emotion)
test_set = SongEmotionDataset(test_song, test_emotion)
print("Train set size: " + str(len(train_set)))
print("Test set size: " + str(len(test_set)))

kwargs = {'num_workers': 1, 'pin_memory': True} if device == 'cuda' else {} #needed for using datasets on gpu
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 8, shuffle = True, **kwargs)
test_loader = torch.utils.data.DataLoader(test_set, batch_size = 8, shuffle = True, **kwargs)

optimizer = optim.Adam(model.parameters(), lr = 0.01, weight_decay = 0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 20, gamma = 0.1)

Train set size: 320
Test set size: 80


In [71]:
def train(model, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        data.unsqueeze_(1)
        data = data.requires_grad_() #set requires_grad to True for training
        output = model(data)
#         output = output.view(-1, len(emotions))
#         print(output.shape, target.shape)
#         print(output, target)
        loss = F.kl_div(output, target)
#         loss = F.cross_entropy(output, target)
#         loss = nn.CrossEntropyLoss(output, target)
#         loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
#         scheduler.step()
        if batch_idx % log_interval == 0: #print training stats
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss))

In [72]:
def test(model, epoch):
    model.eval()
    correct = 0
    for data, target in test_loader:
        data. unsqueeze_(1)
        output = model(data)
        print(output)
        print(target)
        print("\n")
#         output = output.permute(1, 0, 2)
        pred = output.max(1)[1] # get the index of the max log-probability 
        correct += pred.eq(target.max(1)[1]).cpu().sum().item()
    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [73]:
import warnings

log_interval = 5
warnings.filterwarnings("ignore")
for epoch in range(1, 100):
    print("training epoch " + str(epoch))
    if epoch == 31:
        print("First round of training complete. Setting learn rate to 0.001.")
#     scheduler.step()
    train(model, epoch)
    scheduler.step()
    test(model, epoch)

training epoch 1
tensor([[-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167],
        [-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167],
        [-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167],
        [-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167],
        [-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167],
        [-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167],
        [-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167],
        [-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.4487e-02, 5.3295e-03, 1.0705e-01, 7.9097e-01, 3.9380e-02, 7.2127e-04,
         1.9606e-03, 7.2127e-04, 3.9380e-02],
        

tensor([[-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167],
        [-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167],
        [-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167],
        [-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167],
        [-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167],
        [-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167],
        [-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167],
        [-2.3167, -2.3167, -2.3167, -2.1630, -1.9139, -2.3167, -1.9247, -2.3167,
         -2.3167]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[8.9807e-05, 7.2771e-01, 6.6359e-04, 1.8038e-03, 2.6771e-01, 8.9807e-05,
         3.3038e-05, 8.9807e-05, 1.8038e-03],
        [1.6299e-03, 2.41

tensor([[-2.5170, -2.5170, -2.5170, -2.1990, -1.5619, -2.5170, -1.6340, -2.5170,
         -2.5170],
        [-2.5170, -2.5170, -2.5170, -2.1990, -1.5619, -2.5170, -1.6340, -2.5170,
         -2.5170],
        [-2.5170, -2.5170, -2.5170, -2.1990, -1.5619, -2.5170, -1.6340, -2.5170,
         -2.5170],
        [-2.5170, -2.5170, -2.5170, -2.1990, -1.5619, -2.5170, -1.6340, -2.5170,
         -2.5170],
        [-2.5170, -2.5170, -2.5170, -2.1990, -1.5619, -2.5170, -1.6340, -2.5170,
         -2.5170],
        [-2.5170, -2.5170, -2.5170, -2.1990, -1.5619, -2.5170, -1.6340, -2.5170,
         -2.5170],
        [-2.5170, -2.5170, -2.5170, -2.1990, -1.5619, -2.5170, -1.6340, -2.5170,
         -2.5170],
        [-2.5170, -2.5170, -2.5170, -2.1990, -1.5619, -2.5170, -1.6340, -2.5170,
         -2.5170]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.2368e-03, 4.4927e-02, 8.2286e-04, 2.2368e-03, 4.4927e-02, 8.2286e-04,
         8.2286e-04, 9.0238e-01, 8.2286e-04],
        [1.0373e-03, 2.81

tensor([[-2.6867, -2.6867, -2.6867, -2.1580, -1.3499, -2.6867, -1.5300, -2.6867,
         -2.6867],
        [-2.6867, -2.6867, -2.6867, -2.1580, -1.3499, -2.6867, -1.5300, -2.6867,
         -2.6867],
        [-2.6867, -2.6867, -2.6867, -2.1580, -1.3499, -2.6867, -1.5300, -2.6867,
         -2.6867],
        [-2.6867, -2.6867, -2.6867, -2.1580, -1.3499, -2.6867, -1.5300, -2.6867,
         -2.6867],
        [-2.6867, -2.6867, -2.6867, -2.1580, -1.3499, -2.6867, -1.5300, -2.6867,
         -2.6867],
        [-2.6867, -2.6867, -2.6867, -2.1580, -1.3499, -2.6867, -1.5300, -2.6867,
         -2.6867],
        [-2.6867, -2.6867, -2.6867, -2.1580, -1.3499, -2.6867, -1.5300, -2.6867,
         -2.6867],
        [-2.6867, -2.6867, -2.6867, -2.1580, -1.3499, -2.6867, -1.5300, -2.6867,
         -2.6867]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.0434e-08, 3.3258e-03, 4.5010e-04, 4.9359e-01, 9.0404e-03, 2.7655e-09,
         2.7655e-09, 5.5546e-08, 4.9359e-01],
        [8.2172e-08, 2.44

tensor([[-2.6849, -2.6849, -2.6849, -2.2028, -1.4217, -2.6849, -1.4319, -2.6849,
         -2.6849],
        [-2.6849, -2.6849, -2.6849, -2.2028, -1.4217, -2.6849, -1.4319, -2.6849,
         -2.6849],
        [-2.6849, -2.6849, -2.6849, -2.2028, -1.4217, -2.6849, -1.4319, -2.6849,
         -2.6849],
        [-2.6849, -2.6849, -2.6849, -2.2028, -1.4217, -2.6849, -1.4319, -2.6849,
         -2.6849],
        [-2.6849, -2.6849, -2.6849, -2.2028, -1.4217, -2.6849, -1.4319, -2.6849,
         -2.6849],
        [-2.6849, -2.6849, -2.6849, -2.2028, -1.4217, -2.6849, -1.4319, -2.6849,
         -2.6849],
        [-2.6849, -2.6849, -2.6849, -2.2028, -1.4217, -2.6849, -1.4319, -2.6849,
         -2.6849],
        [-2.6849, -2.6849, -2.6849, -2.2028, -1.4217, -2.6849, -1.4319, -2.6849,
         -2.6849]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[5.2149e-04, 3.8533e-03, 1.4175e-03, 5.2149e-04, 5.2149e-04, 2.1038e-01,
         5.7188e-01, 2.1038e-01, 5.2149e-04],
        [8.3151e-07, 2.06

tensor([[-2.6660, -2.6660, -2.6660, -2.1313, -1.4137, -2.6660, -1.5101, -2.6660,
         -2.6660],
        [-2.6660, -2.6660, -2.6660, -2.1313, -1.4137, -2.6660, -1.5101, -2.6660,
         -2.6660],
        [-2.6660, -2.6660, -2.6660, -2.1313, -1.4137, -2.6660, -1.5101, -2.6660,
         -2.6660],
        [-2.6660, -2.6660, -2.6660, -2.1313, -1.4137, -2.6660, -1.5101, -2.6660,
         -2.6660],
        [-2.6660, -2.6660, -2.6660, -2.1313, -1.4137, -2.6660, -1.5101, -2.6660,
         -2.6660],
        [-2.6660, -2.6660, -2.6660, -2.1313, -1.4137, -2.6660, -1.5101, -2.6660,
         -2.6660],
        [-2.6660, -2.6660, -2.6660, -2.1313, -1.4137, -2.6660, -1.5101, -2.6660,
         -2.6660],
        [-2.6660, -2.6660, -2.6660, -2.1313, -1.4137, -2.6660, -1.5101, -2.6660,
         -2.6660]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[4.5368e-05, 1.6690e-05, 3.3522e-04, 1.2332e-04, 9.9929e-01, 6.1398e-06,
         1.6690e-05, 4.5368e-05, 1.2332e-04],
        [4.6800e-02, 4.68

tensor([[-2.6564, -2.6564, -2.6564, -2.1297, -1.4336, -2.6564, -1.5075, -2.6564,
         -2.6564],
        [-2.6564, -2.6564, -2.6564, -2.1297, -1.4336, -2.6564, -1.5075, -2.6564,
         -2.6564],
        [-2.6564, -2.6564, -2.6564, -2.1297, -1.4336, -2.6564, -1.5075, -2.6564,
         -2.6564],
        [-2.6564, -2.6564, -2.6564, -2.1297, -1.4336, -2.6564, -1.5075, -2.6564,
         -2.6564],
        [-2.6564, -2.6564, -2.6564, -2.1297, -1.4336, -2.6564, -1.5075, -2.6564,
         -2.6564],
        [-2.6564, -2.6564, -2.6564, -2.1297, -1.4336, -2.6564, -1.5075, -2.6564,
         -2.6564],
        [-2.6564, -2.6564, -2.6564, -2.1297, -1.4336, -2.6564, -1.5075, -2.6564,
         -2.6564],
        [-2.6564, -2.6564, -2.6564, -2.1297, -1.4336, -2.6564, -1.5075, -2.6564,
         -2.6564]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[8.3151e-07, 2.0611e-09, 3.7751e-11, 2.0611e-09, 1.6701e-05, 1.1253e-07,
         9.9998e-01, 2.2603e-06, 3.7751e-11],
        [2.1386e-02, 2.89

tensor([[-2.6497, -2.6497, -2.6497, -2.2138, -1.3936, -2.6497, -1.5210, -2.6497,
         -2.6497],
        [-2.6497, -2.6497, -2.6497, -2.2138, -1.3936, -2.6497, -1.5210, -2.6497,
         -2.6497],
        [-2.6497, -2.6497, -2.6497, -2.2138, -1.3936, -2.6497, -1.5210, -2.6497,
         -2.6497],
        [-2.6497, -2.6497, -2.6497, -2.2138, -1.3936, -2.6497, -1.5210, -2.6497,
         -2.6497],
        [-2.6497, -2.6497, -2.6497, -2.2138, -1.3936, -2.6497, -1.5210, -2.6497,
         -2.6497],
        [-2.6497, -2.6497, -2.6497, -2.2138, -1.3936, -2.6497, -1.5210, -2.6497,
         -2.6497],
        [-2.6497, -2.6497, -2.6497, -2.2138, -1.3936, -2.6497, -1.5210, -2.6497,
         -2.6497],
        [-2.6497, -2.6497, -2.6497, -2.2138, -1.3936, -2.6497, -1.5210, -2.6497,
         -2.6497]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.6425e-01, 2.4097e-04, 1.7805e-03, 4.8399e-03, 7.1831e-01, 6.5501e-04,
         4.8399e-03, 4.8399e-03, 2.4097e-04],
        [1.2572e-02, 3.41

tensor([[-2.6675, -2.6675, -2.6675, -2.1620, -1.4438, -2.6471, -1.4658, -2.6675,
         -2.6675],
        [-2.6675, -2.6675, -2.6675, -2.1620, -1.4438, -2.6471, -1.4658, -2.6675,
         -2.6675],
        [-2.6675, -2.6675, -2.6675, -2.1620, -1.4438, -2.6471, -1.4658, -2.6675,
         -2.6675],
        [-2.6675, -2.6675, -2.6675, -2.1620, -1.4438, -2.6471, -1.4658, -2.6675,
         -2.6675],
        [-2.6675, -2.6675, -2.6675, -2.1620, -1.4438, -2.6471, -1.4658, -2.6675,
         -2.6675],
        [-2.6675, -2.6675, -2.6675, -2.1620, -1.4438, -2.6471, -1.4658, -2.6675,
         -2.6675],
        [-2.6675, -2.6675, -2.6675, -2.1620, -1.4438, -2.6471, -1.4658, -2.6675,
         -2.6675],
        [-2.6675, -2.6675, -2.6675, -2.1620, -1.4438, -2.6471, -1.4658, -2.6675,
         -2.6675]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[3.1623e-04, 1.1633e-04, 4.6932e-02, 3.1623e-04, 9.4266e-01, 1.1633e-04,
         2.3366e-03, 6.3516e-03, 8.5959e-04],
        [1.9345e-02, 3.88

tensor([[-2.7045, -2.7045, -2.7045, -2.1702, -1.3692, -2.5694, -1.5121, -2.7045,
         -2.7045],
        [-2.7045, -2.7045, -2.7045, -2.1702, -1.3692, -2.5694, -1.5121, -2.7045,
         -2.7045],
        [-2.7045, -2.7045, -2.7045, -2.1702, -1.3692, -2.5694, -1.5121, -2.7045,
         -2.7045],
        [-2.7045, -2.7045, -2.7045, -2.1702, -1.3692, -2.5694, -1.5121, -2.7045,
         -2.7045],
        [-2.7045, -2.7045, -2.7045, -2.1702, -1.3692, -2.5694, -1.5121, -2.7045,
         -2.7045],
        [-2.7045, -2.7045, -2.7045, -2.1702, -1.3692, -2.5694, -1.5121, -2.7045,
         -2.7045],
        [-2.7045, -2.7045, -2.7045, -2.1702, -1.3692, -2.5694, -1.5121, -2.7045,
         -2.7045],
        [-2.7045, -2.7045, -2.7045, -2.1702, -1.3692, -2.5694, -1.5121, -2.7045,
         -2.7045]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[3.1623e-04, 1.1633e-04, 4.6932e-02, 3.1623e-04, 9.4266e-01, 1.1633e-04,
         2.3366e-03, 6.3516e-03, 8.5959e-04],
        [1.9345e-02, 3.88

tensor([[-2.7168, -2.7168, -2.7168, -2.1587, -1.4076, -2.5546, -1.4771, -2.6674,
         -2.7168],
        [-2.7168, -2.7168, -2.7168, -2.1587, -1.4076, -2.5546, -1.4771, -2.6674,
         -2.7168],
        [-2.7168, -2.7168, -2.7168, -2.1587, -1.4076, -2.5546, -1.4771, -2.6674,
         -2.7168],
        [-2.7168, -2.7168, -2.7168, -2.1587, -1.4076, -2.5546, -1.4771, -2.6674,
         -2.7168],
        [-2.7168, -2.7168, -2.7168, -2.1587, -1.4076, -2.5546, -1.4771, -2.6674,
         -2.7168],
        [-2.7168, -2.7168, -2.7168, -2.1587, -1.4076, -2.5546, -1.4771, -2.6674,
         -2.7168],
        [-2.7168, -2.7168, -2.7168, -2.1587, -1.4076, -2.5546, -1.4771, -2.6674,
         -2.7168],
        [-2.7168, -2.7168, -2.7168, -2.1587, -1.4076, -2.5546, -1.4771, -2.6674,
         -2.7168]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[4.6800e-02, 4.6800e-02, 6.3337e-03, 4.6800e-02, 1.7217e-02, 1.7217e-02,
         3.4581e-01, 3.4581e-01, 1.2722e-01],
        [2.8770e-07, 4.68

tensor([[-2.8034, -2.8034, -2.8034, -2.1768, -1.4412, -2.6959, -1.4157, -2.3297,
         -2.8034],
        [-2.8034, -2.8034, -2.8034, -2.1768, -1.4412, -2.6959, -1.4157, -2.3297,
         -2.8034],
        [-2.8034, -2.8034, -2.8034, -2.1768, -1.4412, -2.6959, -1.4157, -2.3297,
         -2.8034],
        [-2.8034, -2.8034, -2.8034, -2.1768, -1.4412, -2.6959, -1.4157, -2.3297,
         -2.8034],
        [-2.8034, -2.8034, -2.8034, -2.1768, -1.4412, -2.6959, -1.4157, -2.3297,
         -2.8034],
        [-2.8034, -2.8034, -2.8034, -2.1768, -1.4412, -2.6959, -1.4157, -2.3297,
         -2.8034],
        [-2.8034, -2.8034, -2.8034, -2.1768, -1.4412, -2.6959, -1.4157, -2.3297,
         -2.8034],
        [-2.8034, -2.8034, -2.8034, -2.1768, -1.4412, -2.6959, -1.4157, -2.3297,
         -2.8034]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[5.2149e-04, 3.8533e-03, 1.4175e-03, 5.2149e-04, 5.2149e-04, 2.1038e-01,
         5.7188e-01, 2.1038e-01, 5.2149e-04],
        [5.6441e-05, 4.17

tensor([[-2.8550, -2.8550, -2.8550, -2.1817, -1.3867, -2.5620, -1.5025, -2.2319,
         -2.8550],
        [-2.8550, -2.8550, -2.8550, -2.1817, -1.3867, -2.5620, -1.5025, -2.2319,
         -2.8550],
        [-2.8550, -2.8550, -2.8550, -2.1817, -1.3867, -2.5620, -1.5025, -2.2319,
         -2.8550],
        [-2.8550, -2.8550, -2.8550, -2.1817, -1.3867, -2.5620, -1.5025, -2.2319,
         -2.8550],
        [-2.8550, -2.8550, -2.8550, -2.1817, -1.3867, -2.5620, -1.5025, -2.2319,
         -2.8550],
        [-2.8550, -2.8550, -2.8550, -2.1817, -1.3867, -2.5620, -1.5025, -2.2319,
         -2.8550],
        [-2.8550, -2.8550, -2.8550, -2.1817, -1.3867, -2.5620, -1.5025, -2.2319,
         -2.8550],
        [-2.8550, -2.8550, -2.8550, -2.1817, -1.3867, -2.5620, -1.5025, -2.2319,
         -2.8550]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.5031e-02, 2.0342e-03, 8.2066e-01, 4.0858e-02, 1.1106e-01, 7.4835e-04,
         5.5296e-03, 2.0342e-03, 2.0342e-03],
        [1.5633e-04, 3.13

tensor([[-2.8734, -2.8734, -2.8734, -2.1126, -1.4012, -2.5670, -1.5208, -2.1944,
         -2.8734],
        [-2.8734, -2.8734, -2.8734, -2.1126, -1.4012, -2.5670, -1.5208, -2.1944,
         -2.8734],
        [-2.8734, -2.8734, -2.8734, -2.1126, -1.4012, -2.5670, -1.5208, -2.1944,
         -2.8734],
        [-2.8734, -2.8734, -2.8734, -2.1126, -1.4012, -2.5670, -1.5208, -2.1944,
         -2.8734],
        [-2.8734, -2.8734, -2.8734, -2.1126, -1.4012, -2.5670, -1.5208, -2.1944,
         -2.8734],
        [-2.8734, -2.8734, -2.8734, -2.1126, -1.4012, -2.5670, -1.5208, -2.1944,
         -2.8734],
        [-2.8734, -2.8734, -2.8734, -2.1126, -1.4012, -2.5670, -1.5208, -2.1944,
         -2.8734],
        [-2.8734, -2.8734, -2.8734, -2.1126, -1.4012, -2.5670, -1.5208, -2.1944,
         -2.8734]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.9075e-04, 1.1730e-01, 7.9034e-04, 7.9034e-04, 2.1484e-03, 5.8399e-03,
         2.9075e-04, 8.6671e-01, 5.8399e-03],
        [1.5659e-02, 2.86

tensor([[-2.9299, -2.9299, -2.9299, -2.1536, -1.4687, -2.4755, -1.4409, -2.1263,
         -2.9299],
        [-2.9299, -2.9299, -2.9299, -2.1536, -1.4687, -2.4755, -1.4409, -2.1263,
         -2.9299],
        [-2.9299, -2.9299, -2.9299, -2.1536, -1.4687, -2.4755, -1.4409, -2.1263,
         -2.9299],
        [-2.9299, -2.9299, -2.9299, -2.1536, -1.4687, -2.4755, -1.4409, -2.1263,
         -2.9299],
        [-2.9299, -2.9299, -2.9299, -2.1536, -1.4687, -2.4755, -1.4409, -2.1263,
         -2.9299],
        [-2.9299, -2.9299, -2.9299, -2.1536, -1.4687, -2.4755, -1.4409, -2.1263,
         -2.9299],
        [-2.9299, -2.9299, -2.9299, -2.1536, -1.4687, -2.4755, -1.4409, -2.1263,
         -2.9299],
        [-2.9299, -2.9299, -2.9299, -2.1536, -1.4687, -2.4755, -1.4409, -2.1263,
         -2.9299]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.6571e-10, 5.3370e-09, 1.5909e-05, 4.7425e-02, 9.5255e-01, 2.6571e-10,
         1.9634e-09, 3.9435e-08, 5.8527e-06],
        [3.0413e-02, 1.11

tensor([[-2.9104, -2.9104, -2.9104, -2.0873, -1.3783, -2.4921, -1.5699, -2.1599,
         -2.9104],
        [-2.9104, -2.9104, -2.9104, -2.0873, -1.3783, -2.4921, -1.5699, -2.1599,
         -2.9104],
        [-2.9104, -2.9104, -2.9104, -2.0873, -1.3783, -2.4921, -1.5699, -2.1599,
         -2.9104],
        [-2.9104, -2.9104, -2.9104, -2.0873, -1.3783, -2.4921, -1.5699, -2.1599,
         -2.9104],
        [-2.9104, -2.9104, -2.9104, -2.0873, -1.3783, -2.4921, -1.5699, -2.1599,
         -2.9104],
        [-2.9104, -2.9104, -2.9104, -2.0873, -1.3783, -2.4921, -1.5699, -2.1599,
         -2.9104],
        [-2.9104, -2.9104, -2.9104, -2.0873, -1.3783, -2.4921, -1.5699, -2.1599,
         -2.9104],
        [-2.9104, -2.9104, -2.9104, -2.0873, -1.3783, -2.4921, -1.5699, -2.1599,
         -2.9104]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[7.5794e-10, 4.5381e-05, 1.2336e-04, 9.9958e-01, 1.2336e-04, 7.5794e-10,
         2.0603e-09, 3.0578e-07, 1.2336e-04],
        [2.8770e-07, 4.68

tensor([[-2.9390, -2.9390, -2.9390, -2.1689, -1.3895, -2.5276, -1.4745, -2.1534,
         -2.9390],
        [-2.9390, -2.9390, -2.9390, -2.1689, -1.3895, -2.5276, -1.4745, -2.1534,
         -2.9390],
        [-2.9390, -2.9390, -2.9390, -2.1689, -1.3895, -2.5276, -1.4745, -2.1534,
         -2.9390],
        [-2.9390, -2.9390, -2.9390, -2.1689, -1.3895, -2.5276, -1.4745, -2.1534,
         -2.9390],
        [-2.9390, -2.9390, -2.9390, -2.1689, -1.3895, -2.5276, -1.4745, -2.1534,
         -2.9390],
        [-2.9390, -2.9390, -2.9390, -2.1689, -1.3895, -2.5276, -1.4745, -2.1534,
         -2.9390],
        [-2.9390, -2.9390, -2.9390, -2.1689, -1.3895, -2.5276, -1.4745, -2.1534,
         -2.9390],
        [-2.9390, -2.9390, -2.9390, -2.1689, -1.3895, -2.5276, -1.4745, -2.1534,
         -2.9390]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.6226e-03, 5.9692e-04, 6.5460e-01, 2.4081e-01, 8.8590e-02, 5.9692e-04,
         5.9692e-04, 5.9692e-04, 1.1989e-02],
        [4.6800e-02, 4.68

tensor([[-2.9782, -2.9782, -2.9782, -2.0692, -1.3631, -2.4512, -1.5383, -2.1769,
         -2.9782],
        [-2.9782, -2.9782, -2.9782, -2.0692, -1.3631, -2.4512, -1.5383, -2.1769,
         -2.9782],
        [-2.9782, -2.9782, -2.9782, -2.0692, -1.3631, -2.4512, -1.5383, -2.1769,
         -2.9782],
        [-2.9782, -2.9782, -2.9782, -2.0692, -1.3631, -2.4512, -1.5383, -2.1769,
         -2.9782],
        [-2.9782, -2.9782, -2.9782, -2.0692, -1.3631, -2.4512, -1.5383, -2.1769,
         -2.9782],
        [-2.9782, -2.9782, -2.9782, -2.0692, -1.3631, -2.4512, -1.5383, -2.1769,
         -2.9782],
        [-2.9782, -2.9782, -2.9782, -2.0692, -1.3631, -2.4512, -1.5383, -2.1769,
         -2.9782],
        [-2.9782, -2.9782, -2.9782, -2.0692, -1.3631, -2.4512, -1.5383, -2.1769,
         -2.9782]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[3.3473e-04, 1.6665e-05, 6.1307e-06, 6.1307e-06, 6.1307e-06, 9.0988e-04,
         9.9780e-01, 9.0988e-04, 6.1307e-06],
        [2.0948e-02, 1.54

tensor([[-2.9578, -2.9578, -2.9578, -2.1134, -1.3654, -2.5292, -1.4773, -2.2239,
         -2.9578],
        [-2.9578, -2.9578, -2.9578, -2.1134, -1.3654, -2.5292, -1.4773, -2.2239,
         -2.9578],
        [-2.9578, -2.9578, -2.9578, -2.1134, -1.3654, -2.5292, -1.4773, -2.2239,
         -2.9578],
        [-2.9578, -2.9578, -2.9578, -2.1134, -1.3654, -2.5292, -1.4773, -2.2239,
         -2.9578],
        [-2.9578, -2.9578, -2.9578, -2.1134, -1.3654, -2.5292, -1.4773, -2.2239,
         -2.9578],
        [-2.9578, -2.9578, -2.9578, -2.1134, -1.3654, -2.5292, -1.4773, -2.2239,
         -2.9578],
        [-2.9578, -2.9578, -2.9578, -2.1134, -1.3654, -2.5292, -1.4773, -2.2239,
         -2.9578],
        [-2.9578, -2.9578, -2.9578, -2.1134, -1.3654, -2.5292, -1.4773, -2.2239,
         -2.9578]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.6571e-10, 5.3370e-09, 1.5909e-05, 4.7425e-02, 9.5255e-01, 2.6571e-10,
         1.9634e-09, 3.9435e-08, 5.8527e-06],
        [1.0518e-01, 7.08

tensor([[-2.9968, -2.9968, -2.9968, -2.1134, -1.3224, -2.5201, -1.5038, -2.2057,
         -2.9968],
        [-2.9968, -2.9968, -2.9968, -2.1134, -1.3224, -2.5201, -1.5038, -2.2057,
         -2.9968],
        [-2.9968, -2.9968, -2.9968, -2.1134, -1.3224, -2.5201, -1.5038, -2.2057,
         -2.9968],
        [-2.9968, -2.9968, -2.9968, -2.1134, -1.3224, -2.5201, -1.5038, -2.2057,
         -2.9968],
        [-2.9968, -2.9968, -2.9968, -2.1134, -1.3224, -2.5201, -1.5038, -2.2057,
         -2.9968],
        [-2.9968, -2.9968, -2.9968, -2.1134, -1.3224, -2.5201, -1.5038, -2.2057,
         -2.9968],
        [-2.9968, -2.9968, -2.9968, -2.1134, -1.3224, -2.5201, -1.5038, -2.2057,
         -2.9968],
        [-2.9968, -2.9968, -2.9968, -2.1134, -1.3224, -2.5201, -1.5038, -2.2057,
         -2.9968]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[4.1297e-08, 2.0561e-09, 2.5374e-13, 6.8973e-13, 2.5374e-13, 2.4726e-03,
         9.9753e-01, 2.0561e-09, 2.5374e-13],
        [3.3473e-04, 1.66

tensor([[-2.9523, -2.9523, -2.9523, -2.1296, -1.3506, -2.5139, -1.5505, -2.1198,
         -2.9523],
        [-2.9523, -2.9523, -2.9523, -2.1296, -1.3506, -2.5139, -1.5505, -2.1198,
         -2.9523],
        [-2.9523, -2.9523, -2.9523, -2.1296, -1.3506, -2.5139, -1.5505, -2.1198,
         -2.9523],
        [-2.9523, -2.9523, -2.9523, -2.1296, -1.3506, -2.5139, -1.5505, -2.1198,
         -2.9523],
        [-2.9523, -2.9523, -2.9523, -2.1296, -1.3506, -2.5139, -1.5505, -2.1198,
         -2.9523],
        [-2.9523, -2.9523, -2.9523, -2.1296, -1.3506, -2.5139, -1.5505, -2.1198,
         -2.9523],
        [-2.9523, -2.9523, -2.9523, -2.1296, -1.3506, -2.5139, -1.5505, -2.1198,
         -2.9523],
        [-2.9523, -2.9523, -2.9523, -2.1296, -1.3506, -2.5139, -1.5505, -2.1198,
         -2.9523]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.4972e-02, 3.0072e-01, 5.5078e-03, 1.1063e-01, 1.1063e-01, 1.1063e-01,
         4.0698e-02, 3.0072e-01, 5.5078e-03],
        [4.1297e-08, 2.05

tensor([[-2.9483, -2.9483, -2.9483, -2.1365, -1.3662, -2.5143, -1.5360, -2.1120,
         -2.9483],
        [-2.9483, -2.9483, -2.9483, -2.1365, -1.3662, -2.5143, -1.5360, -2.1120,
         -2.9483],
        [-2.9483, -2.9483, -2.9483, -2.1365, -1.3662, -2.5143, -1.5360, -2.1120,
         -2.9483],
        [-2.9483, -2.9483, -2.9483, -2.1365, -1.3662, -2.5143, -1.5360, -2.1120,
         -2.9483],
        [-2.9483, -2.9483, -2.9483, -2.1365, -1.3662, -2.5143, -1.5360, -2.1120,
         -2.9483],
        [-2.9483, -2.9483, -2.9483, -2.1365, -1.3662, -2.5143, -1.5360, -2.1120,
         -2.9483],
        [-2.9483, -2.9483, -2.9483, -2.1365, -1.3662, -2.5143, -1.5360, -2.1120,
         -2.9483],
        [-2.9483, -2.9483, -2.9483, -2.1365, -1.3662, -2.5143, -1.5360, -2.1120,
         -2.9483]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[4.2896e-03, 8.6158e-02, 4.2896e-03, 5.8053e-04, 6.3663e-01, 5.8053e-04,
         1.5780e-03, 2.3420e-01, 3.1696e-02],
        [7.4267e-03, 2.73

tensor([[-2.9531, -2.9531, -2.9531, -2.1353, -1.3731, -2.5092, -1.5196, -2.1235,
         -2.9531],
        [-2.9531, -2.9531, -2.9531, -2.1353, -1.3731, -2.5092, -1.5196, -2.1235,
         -2.9531],
        [-2.9531, -2.9531, -2.9531, -2.1353, -1.3731, -2.5092, -1.5196, -2.1235,
         -2.9531],
        [-2.9531, -2.9531, -2.9531, -2.1353, -1.3731, -2.5092, -1.5196, -2.1235,
         -2.9531],
        [-2.9531, -2.9531, -2.9531, -2.1353, -1.3731, -2.5092, -1.5196, -2.1235,
         -2.9531],
        [-2.9531, -2.9531, -2.9531, -2.1353, -1.3731, -2.5092, -1.5196, -2.1235,
         -2.9531],
        [-2.9531, -2.9531, -2.9531, -2.1353, -1.3731, -2.5092, -1.5196, -2.1235,
         -2.9531],
        [-2.9531, -2.9531, -2.9531, -2.1353, -1.3731, -2.5092, -1.5196, -2.1235,
         -2.9531]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.5286e-01, 7.6104e-03, 4.1552e-01, 1.5286e-01, 5.6234e-02, 2.0687e-02,
         1.5286e-01, 2.0687e-02, 2.0687e-02],
        [9.0557e-04, 2.46

tensor([[-2.9587, -2.9587, -2.9587, -2.1290, -1.3648, -2.5212, -1.5160, -2.1362,
         -2.9587],
        [-2.9587, -2.9587, -2.9587, -2.1290, -1.3648, -2.5212, -1.5160, -2.1362,
         -2.9587],
        [-2.9587, -2.9587, -2.9587, -2.1290, -1.3648, -2.5212, -1.5160, -2.1362,
         -2.9587],
        [-2.9587, -2.9587, -2.9587, -2.1290, -1.3648, -2.5212, -1.5160, -2.1362,
         -2.9587],
        [-2.9587, -2.9587, -2.9587, -2.1290, -1.3648, -2.5212, -1.5160, -2.1362,
         -2.9587],
        [-2.9587, -2.9587, -2.9587, -2.1290, -1.3648, -2.5212, -1.5160, -2.1362,
         -2.9587],
        [-2.9587, -2.9587, -2.9587, -2.1290, -1.3648, -2.5212, -1.5160, -2.1362,
         -2.9587],
        [-2.9587, -2.9587, -2.9587, -2.1290, -1.3648, -2.5212, -1.5160, -2.1362,
         -2.9587]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.7598e-11, 2.7598e-11, 7.5020e-11, 7.3106e-01, 4.0960e-09, 1.0153e-11,
         1.0153e-11, 1.0153e-11, 2.6894e-01],
        [9.0172e-04, 6.66

tensor([[-2.9585, -2.9585, -2.9585, -2.1297, -1.3740, -2.5166, -1.5086, -2.1328,
         -2.9585],
        [-2.9585, -2.9585, -2.9585, -2.1297, -1.3740, -2.5166, -1.5086, -2.1328,
         -2.9585],
        [-2.9585, -2.9585, -2.9585, -2.1297, -1.3740, -2.5166, -1.5086, -2.1328,
         -2.9585],
        [-2.9585, -2.9585, -2.9585, -2.1297, -1.3740, -2.5166, -1.5086, -2.1328,
         -2.9585],
        [-2.9585, -2.9585, -2.9585, -2.1297, -1.3740, -2.5166, -1.5086, -2.1328,
         -2.9585],
        [-2.9585, -2.9585, -2.9585, -2.1297, -1.3740, -2.5166, -1.5086, -2.1328,
         -2.9585],
        [-2.9585, -2.9585, -2.9585, -2.1297, -1.3740, -2.5166, -1.5086, -2.1328,
         -2.9585],
        [-2.9585, -2.9585, -2.9585, -2.1297, -1.3740, -2.5166, -1.5086, -2.1328,
         -2.9585]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.7598e-11, 2.7598e-11, 7.5020e-11, 7.3106e-01, 4.0960e-09, 1.0153e-11,
         1.0153e-11, 1.0153e-11, 2.6894e-01],
        [3.8200e-02, 1.40

tensor([[-2.9571, -2.9571, -2.9571, -2.1166, -1.3863, -2.5160, -1.5070, -2.1259,
         -2.9571],
        [-2.9571, -2.9571, -2.9571, -2.1166, -1.3863, -2.5160, -1.5070, -2.1259,
         -2.9571],
        [-2.9571, -2.9571, -2.9571, -2.1166, -1.3863, -2.5160, -1.5070, -2.1259,
         -2.9571],
        [-2.9571, -2.9571, -2.9571, -2.1166, -1.3863, -2.5160, -1.5070, -2.1259,
         -2.9571],
        [-2.9571, -2.9571, -2.9571, -2.1166, -1.3863, -2.5160, -1.5070, -2.1259,
         -2.9571],
        [-2.9571, -2.9571, -2.9571, -2.1166, -1.3863, -2.5160, -1.5070, -2.1259,
         -2.9571],
        [-2.9571, -2.9571, -2.9571, -2.1166, -1.3863, -2.5160, -1.5070, -2.1259,
         -2.9571],
        [-2.9571, -2.9571, -2.9571, -2.1166, -1.3863, -2.5160, -1.5070, -2.1259,
         -2.9571]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[5.2149e-04, 3.8533e-03, 1.4175e-03, 5.2149e-04, 5.2149e-04, 2.1038e-01,
         5.7188e-01, 2.1038e-01, 5.2149e-04],
        [9.0172e-04, 6.66

tensor([[-2.9617, -2.9617, -2.9617, -2.1224, -1.3809, -2.5220, -1.5008, -2.1309,
         -2.9617],
        [-2.9617, -2.9617, -2.9617, -2.1224, -1.3809, -2.5220, -1.5008, -2.1309,
         -2.9617],
        [-2.9617, -2.9617, -2.9617, -2.1224, -1.3809, -2.5220, -1.5008, -2.1309,
         -2.9617],
        [-2.9617, -2.9617, -2.9617, -2.1224, -1.3809, -2.5220, -1.5008, -2.1309,
         -2.9617],
        [-2.9617, -2.9617, -2.9617, -2.1224, -1.3809, -2.5220, -1.5008, -2.1309,
         -2.9617],
        [-2.9617, -2.9617, -2.9617, -2.1224, -1.3809, -2.5220, -1.5008, -2.1309,
         -2.9617],
        [-2.9617, -2.9617, -2.9617, -2.1224, -1.3809, -2.5220, -1.5008, -2.1309,
         -2.9617],
        [-2.9617, -2.9617, -2.9617, -2.1224, -1.3809, -2.5220, -1.5008, -2.1309,
         -2.9617]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.5226e-08, 7.5803e-10, 4.5387e-05, 9.9971e-01, 1.2337e-04, 3.7740e-11,
         1.2337e-04, 7.5803e-10, 2.0606e-09],
        [4.3995e-03, 8.83

tensor([[-2.9586, -2.9586, -2.9586, -2.1264, -1.3863, -2.5143, -1.4998, -2.1279,
         -2.9586],
        [-2.9586, -2.9586, -2.9586, -2.1264, -1.3863, -2.5143, -1.4998, -2.1279,
         -2.9586],
        [-2.9586, -2.9586, -2.9586, -2.1264, -1.3863, -2.5143, -1.4998, -2.1279,
         -2.9586],
        [-2.9586, -2.9586, -2.9586, -2.1264, -1.3863, -2.5143, -1.4998, -2.1279,
         -2.9586],
        [-2.9586, -2.9586, -2.9586, -2.1264, -1.3863, -2.5143, -1.4998, -2.1279,
         -2.9586],
        [-2.9586, -2.9586, -2.9586, -2.1264, -1.3863, -2.5143, -1.4998, -2.1279,
         -2.9586],
        [-2.9586, -2.9586, -2.9586, -2.1264, -1.3863, -2.5143, -1.4998, -2.1279,
         -2.9586],
        [-2.9586, -2.9586, -2.9586, -2.1264, -1.3863, -2.5143, -1.4998, -2.1279,
         -2.9586]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.4632e-02, 1.0812e-01, 1.9802e-03, 1.4632e-02, 5.3828e-03, 3.9774e-02,
         1.9802e-03, 7.9887e-01, 1.4632e-02],
        [8.9781e-04, 2.44

tensor([[-2.9625, -2.9625, -2.9625, -2.1283, -1.3811, -2.5199, -1.4948, -2.1360,
         -2.9625],
        [-2.9625, -2.9625, -2.9625, -2.1283, -1.3811, -2.5199, -1.4948, -2.1360,
         -2.9625],
        [-2.9625, -2.9625, -2.9625, -2.1283, -1.3811, -2.5199, -1.4948, -2.1360,
         -2.9625],
        [-2.9625, -2.9625, -2.9625, -2.1283, -1.3811, -2.5199, -1.4948, -2.1360,
         -2.9625],
        [-2.9625, -2.9625, -2.9625, -2.1283, -1.3811, -2.5199, -1.4948, -2.1360,
         -2.9625],
        [-2.9625, -2.9625, -2.9625, -2.1283, -1.3811, -2.5199, -1.4948, -2.1360,
         -2.9625],
        [-2.9625, -2.9625, -2.9625, -2.1283, -1.3811, -2.5199, -1.4948, -2.1360,
         -2.9625],
        [-2.9625, -2.9625, -2.9625, -2.1283, -1.3811, -2.5199, -1.4948, -2.1360,
         -2.9625]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[5.9991e-03, 1.6307e-02, 5.9991e-03, 1.6307e-02, 8.9034e-01, 2.2069e-03,
         2.2069e-03, 1.6307e-02, 4.4327e-02],
        [2.7598e-11, 2.75

tensor([[-2.9603, -2.9603, -2.9603, -2.1281, -1.3822, -2.5141, -1.4964, -2.1385,
         -2.9603],
        [-2.9603, -2.9603, -2.9603, -2.1281, -1.3822, -2.5141, -1.4964, -2.1385,
         -2.9603],
        [-2.9603, -2.9603, -2.9603, -2.1281, -1.3822, -2.5141, -1.4964, -2.1385,
         -2.9603],
        [-2.9603, -2.9603, -2.9603, -2.1281, -1.3822, -2.5141, -1.4964, -2.1385,
         -2.9603],
        [-2.9603, -2.9603, -2.9603, -2.1281, -1.3822, -2.5141, -1.4964, -2.1385,
         -2.9603],
        [-2.9603, -2.9603, -2.9603, -2.1281, -1.3822, -2.5141, -1.4964, -2.1385,
         -2.9603],
        [-2.9603, -2.9603, -2.9603, -2.1281, -1.3822, -2.5141, -1.4964, -2.1385,
         -2.9603],
        [-2.9603, -2.9603, -2.9603, -2.1281, -1.3822, -2.5141, -1.4964, -2.1385,
         -2.9603]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[5.2149e-04, 3.8533e-03, 1.4175e-03, 5.2149e-04, 5.2149e-04, 2.1038e-01,
         5.7188e-01, 2.1038e-01, 5.2149e-04],
        [1.5659e-02, 2.86

tensor([[-2.9613, -2.9613, -2.9613, -2.1312, -1.3847, -2.5111, -1.5005, -2.1228,
         -2.9613],
        [-2.9613, -2.9613, -2.9613, -2.1312, -1.3847, -2.5111, -1.5005, -2.1228,
         -2.9613],
        [-2.9613, -2.9613, -2.9613, -2.1312, -1.3847, -2.5111, -1.5005, -2.1228,
         -2.9613],
        [-2.9613, -2.9613, -2.9613, -2.1312, -1.3847, -2.5111, -1.5005, -2.1228,
         -2.9613],
        [-2.9613, -2.9613, -2.9613, -2.1312, -1.3847, -2.5111, -1.5005, -2.1228,
         -2.9613],
        [-2.9613, -2.9613, -2.9613, -2.1312, -1.3847, -2.5111, -1.5005, -2.1228,
         -2.9613],
        [-2.9613, -2.9613, -2.9613, -2.1312, -1.3847, -2.5111, -1.5005, -2.1228,
         -2.9613],
        [-2.9613, -2.9613, -2.9613, -2.1312, -1.3847, -2.5111, -1.5005, -2.1228,
         -2.9613]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[4.3995e-03, 8.8365e-02, 2.1904e-04, 6.5294e-01, 2.4020e-01, 2.1904e-04,
         8.0579e-05, 1.6185e-03, 1.1959e-02],
        [4.8426e-04, 3.57

tensor([[-2.9700, -2.9700, -2.9700, -2.1320, -1.3752, -2.5211, -1.4929, -2.1344,
         -2.9700],
        [-2.9700, -2.9700, -2.9700, -2.1320, -1.3752, -2.5211, -1.4929, -2.1344,
         -2.9700],
        [-2.9700, -2.9700, -2.9700, -2.1320, -1.3752, -2.5211, -1.4929, -2.1344,
         -2.9700],
        [-2.9700, -2.9700, -2.9700, -2.1320, -1.3752, -2.5211, -1.4929, -2.1344,
         -2.9700],
        [-2.9700, -2.9700, -2.9700, -2.1320, -1.3752, -2.5211, -1.4929, -2.1344,
         -2.9700],
        [-2.9700, -2.9700, -2.9700, -2.1320, -1.3752, -2.5211, -1.4929, -2.1344,
         -2.9700],
        [-2.9700, -2.9700, -2.9700, -2.1320, -1.3752, -2.5211, -1.4929, -2.1344,
         -2.9700],
        [-2.9700, -2.9700, -2.9700, -2.1320, -1.3752, -2.5211, -1.4929, -2.1344,
         -2.9700]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[4.1966e-03, 8.4290e-02, 1.5438e-03, 2.2912e-01, 6.2282e-01, 4.1966e-03,
         1.1407e-02, 1.1407e-02, 3.1009e-02],
        [4.6410e-03, 4.64

tensor([[-2.9739, -2.9739, -2.9739, -2.1232, -1.3764, -2.5182, -1.4889, -2.1437,
         -2.9739],
        [-2.9739, -2.9739, -2.9739, -2.1232, -1.3764, -2.5182, -1.4889, -2.1437,
         -2.9739],
        [-2.9739, -2.9739, -2.9739, -2.1232, -1.3764, -2.5182, -1.4889, -2.1437,
         -2.9739],
        [-2.9739, -2.9739, -2.9739, -2.1232, -1.3764, -2.5182, -1.4889, -2.1437,
         -2.9739],
        [-2.9739, -2.9739, -2.9739, -2.1232, -1.3764, -2.5182, -1.4889, -2.1437,
         -2.9739],
        [-2.9739, -2.9739, -2.9739, -2.1232, -1.3764, -2.5182, -1.4889, -2.1437,
         -2.9739],
        [-2.9739, -2.9739, -2.9739, -2.1232, -1.3764, -2.5182, -1.4889, -2.1437,
         -2.9739],
        [-2.9739, -2.9739, -2.9739, -2.1232, -1.3764, -2.5182, -1.4889, -2.1437,
         -2.9739]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[5.7718e-03, 5.7718e-03, 3.8890e-05, 1.0571e-04, 3.8890e-05, 1.1593e-01,
         8.5661e-01, 1.5689e-02, 3.8890e-05],
        [3.0413e-02, 1.11

tensor([[-2.9752, -2.9752, -2.9752, -2.1274, -1.3769, -2.5157, -1.4934, -2.1292,
         -2.9752],
        [-2.9752, -2.9752, -2.9752, -2.1274, -1.3769, -2.5157, -1.4934, -2.1292,
         -2.9752],
        [-2.9752, -2.9752, -2.9752, -2.1274, -1.3769, -2.5157, -1.4934, -2.1292,
         -2.9752],
        [-2.9752, -2.9752, -2.9752, -2.1274, -1.3769, -2.5157, -1.4934, -2.1292,
         -2.9752],
        [-2.9752, -2.9752, -2.9752, -2.1274, -1.3769, -2.5157, -1.4934, -2.1292,
         -2.9752],
        [-2.9752, -2.9752, -2.9752, -2.1274, -1.3769, -2.5157, -1.4934, -2.1292,
         -2.9752],
        [-2.9752, -2.9752, -2.9752, -2.1274, -1.3769, -2.5157, -1.4934, -2.1292,
         -2.9752],
        [-2.9752, -2.9752, -2.9752, -2.1274, -1.3769, -2.5157, -1.4934, -2.1292,
         -2.9752]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[5.9991e-03, 1.6307e-02, 5.9991e-03, 1.6307e-02, 8.9034e-01, 2.2069e-03,
         2.2069e-03, 1.6307e-02, 4.4327e-02],
        [9.1038e-04, 1.23

tensor([[-2.9754, -2.9754, -2.9754, -2.1336, -1.3813, -2.5270, -1.4843, -2.1230,
         -2.9754],
        [-2.9754, -2.9754, -2.9754, -2.1336, -1.3813, -2.5270, -1.4843, -2.1230,
         -2.9754],
        [-2.9754, -2.9754, -2.9754, -2.1336, -1.3813, -2.5270, -1.4843, -2.1230,
         -2.9754],
        [-2.9754, -2.9754, -2.9754, -2.1336, -1.3813, -2.5270, -1.4843, -2.1230,
         -2.9754],
        [-2.9754, -2.9754, -2.9754, -2.1336, -1.3813, -2.5270, -1.4843, -2.1230,
         -2.9754],
        [-2.9754, -2.9754, -2.9754, -2.1336, -1.3813, -2.5270, -1.4843, -2.1230,
         -2.9754],
        [-2.9754, -2.9754, -2.9754, -2.1336, -1.3813, -2.5270, -1.4843, -2.1230,
         -2.9754],
        [-2.9754, -2.9754, -2.9754, -2.1336, -1.3813, -2.5270, -1.4843, -2.1230,
         -2.9754]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.0373e-03, 2.8196e-03, 3.8159e-04, 1.0373e-03, 2.8196e-03, 1.5394e-01,
         4.1846e-01, 4.1846e-01, 1.0373e-03],
        [8.9781e-04, 2.44

tensor([[-2.9805, -2.9805, -2.9805, -2.1348, -1.3850, -2.5149, -1.4777, -2.1261,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1348, -1.3850, -2.5149, -1.4777, -2.1261,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1348, -1.3850, -2.5149, -1.4777, -2.1261,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1348, -1.3850, -2.5149, -1.4777, -2.1261,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1348, -1.3850, -2.5149, -1.4777, -2.1261,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1348, -1.3850, -2.5149, -1.4777, -2.1261,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1348, -1.3850, -2.5149, -1.4777, -2.1261,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1348, -1.3850, -2.5149, -1.4777, -2.1261,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.8226e-02, 4.9542e-02, 3.6607e-01, 4.9542e-02, 3.6607e-01, 2.4666e-03,
         6.7048e-03, 6.7048e-03, 1.3467e-01],
        [1.7968e-02, 1.21

tensor([[-2.9762, -2.9762, -2.9762, -2.1305, -1.3838, -2.5141, -1.4869, -2.1232,
         -2.9762],
        [-2.9762, -2.9762, -2.9762, -2.1305, -1.3838, -2.5141, -1.4869, -2.1232,
         -2.9762],
        [-2.9762, -2.9762, -2.9762, -2.1305, -1.3838, -2.5141, -1.4869, -2.1232,
         -2.9762],
        [-2.9762, -2.9762, -2.9762, -2.1305, -1.3838, -2.5141, -1.4869, -2.1232,
         -2.9762],
        [-2.9762, -2.9762, -2.9762, -2.1305, -1.3838, -2.5141, -1.4869, -2.1232,
         -2.9762],
        [-2.9762, -2.9762, -2.9762, -2.1305, -1.3838, -2.5141, -1.4869, -2.1232,
         -2.9762],
        [-2.9762, -2.9762, -2.9762, -2.1305, -1.3838, -2.5141, -1.4869, -2.1232,
         -2.9762],
        [-2.9762, -2.9762, -2.9762, -2.1305, -1.3838, -2.5141, -1.4869, -2.1232,
         -2.9762]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.4972e-02, 3.0072e-01, 5.5078e-03, 1.1063e-01, 1.1063e-01, 1.1063e-01,
         4.0698e-02, 3.0072e-01, 5.5078e-03],
        [8.9781e-04, 2.44

tensor([[-2.9817, -2.9817, -2.9817, -2.1357, -1.3829, -2.5157, -1.4779, -2.1266,
         -2.9817],
        [-2.9817, -2.9817, -2.9817, -2.1357, -1.3829, -2.5157, -1.4779, -2.1266,
         -2.9817],
        [-2.9817, -2.9817, -2.9817, -2.1357, -1.3829, -2.5157, -1.4779, -2.1266,
         -2.9817],
        [-2.9817, -2.9817, -2.9817, -2.1357, -1.3829, -2.5157, -1.4779, -2.1266,
         -2.9817],
        [-2.9817, -2.9817, -2.9817, -2.1357, -1.3829, -2.5157, -1.4779, -2.1266,
         -2.9817],
        [-2.9817, -2.9817, -2.9817, -2.1357, -1.3829, -2.5157, -1.4779, -2.1266,
         -2.9817],
        [-2.9817, -2.9817, -2.9817, -2.1357, -1.3829, -2.5157, -1.4779, -2.1266,
         -2.9817],
        [-2.9817, -2.9817, -2.9817, -2.1357, -1.3829, -2.5157, -1.4779, -2.1266,
         -2.9817]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.2200e-03, 2.2200e-03, 4.4590e-02, 8.9561e-01, 4.4590e-02, 2.2200e-03,
         2.2200e-03, 3.0044e-04, 6.0345e-03],
        [9.0557e-04, 2.46

tensor([[-2.9770, -2.9770, -2.9770, -2.1277, -1.3879, -2.5104, -1.4861, -2.1200,
         -2.9770],
        [-2.9770, -2.9770, -2.9770, -2.1277, -1.3879, -2.5104, -1.4861, -2.1200,
         -2.9770],
        [-2.9770, -2.9770, -2.9770, -2.1277, -1.3879, -2.5104, -1.4861, -2.1200,
         -2.9770],
        [-2.9770, -2.9770, -2.9770, -2.1277, -1.3879, -2.5104, -1.4861, -2.1200,
         -2.9770],
        [-2.9770, -2.9770, -2.9770, -2.1277, -1.3879, -2.5104, -1.4861, -2.1200,
         -2.9770],
        [-2.9770, -2.9770, -2.9770, -2.1277, -1.3879, -2.5104, -1.4861, -2.1200,
         -2.9770],
        [-2.9770, -2.9770, -2.9770, -2.1277, -1.3879, -2.5104, -1.4861, -2.1200,
         -2.9770],
        [-2.9770, -2.9770, -2.9770, -2.1277, -1.3879, -2.5104, -1.4861, -2.1200,
         -2.9770]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[5.6441e-05, 4.1705e-04, 6.1895e-02, 4.5735e-01, 4.5735e-01, 5.6441e-05,
         5.6441e-05, 5.6441e-05, 2.2770e-02],
        [1.5226e-08, 7.58

tensor([[-2.9799, -2.9799, -2.9799, -2.1272, -1.3855, -2.5166, -1.4829, -2.1226,
         -2.9799],
        [-2.9799, -2.9799, -2.9799, -2.1272, -1.3855, -2.5166, -1.4829, -2.1226,
         -2.9799],
        [-2.9799, -2.9799, -2.9799, -2.1272, -1.3855, -2.5166, -1.4829, -2.1226,
         -2.9799],
        [-2.9799, -2.9799, -2.9799, -2.1272, -1.3855, -2.5166, -1.4829, -2.1226,
         -2.9799],
        [-2.9799, -2.9799, -2.9799, -2.1272, -1.3855, -2.5166, -1.4829, -2.1226,
         -2.9799],
        [-2.9799, -2.9799, -2.9799, -2.1272, -1.3855, -2.5166, -1.4829, -2.1226,
         -2.9799],
        [-2.9799, -2.9799, -2.9799, -2.1272, -1.3855, -2.5166, -1.4829, -2.1226,
         -2.9799],
        [-2.9799, -2.9799, -2.9799, -2.1272, -1.3855, -2.5166, -1.4829, -2.1226,
         -2.9799]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[3.1623e-04, 1.1633e-04, 4.6932e-02, 3.1623e-04, 9.4266e-01, 1.1633e-04,
         2.3366e-03, 6.3516e-03, 8.5959e-04],
        [6.1423e-06, 9.99

tensor([[-2.9770, -2.9770, -2.9770, -2.1327, -1.3786, -2.5093, -1.4967, -2.1154,
         -2.9770],
        [-2.9770, -2.9770, -2.9770, -2.1327, -1.3786, -2.5093, -1.4967, -2.1154,
         -2.9770],
        [-2.9770, -2.9770, -2.9770, -2.1327, -1.3786, -2.5093, -1.4967, -2.1154,
         -2.9770],
        [-2.9770, -2.9770, -2.9770, -2.1327, -1.3786, -2.5093, -1.4967, -2.1154,
         -2.9770],
        [-2.9770, -2.9770, -2.9770, -2.1327, -1.3786, -2.5093, -1.4967, -2.1154,
         -2.9770],
        [-2.9770, -2.9770, -2.9770, -2.1327, -1.3786, -2.5093, -1.4967, -2.1154,
         -2.9770],
        [-2.9770, -2.9770, -2.9770, -2.1327, -1.3786, -2.5093, -1.4967, -2.1154,
         -2.9770],
        [-2.9770, -2.9770, -2.9770, -2.1327, -1.3786, -2.5093, -1.4967, -2.1154,
         -2.9770]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[9.1038e-04, 1.2321e-04, 3.3491e-04, 1.2321e-04, 4.5325e-05, 4.5325e-05,
         9.9836e-01, 1.6674e-05, 4.5325e-05],
        [7.5569e-10, 1.12

tensor([[-2.9772, -2.9772, -2.9772, -2.1323, -1.3786, -2.5098, -1.4961, -2.1164,
         -2.9772],
        [-2.9772, -2.9772, -2.9772, -2.1323, -1.3786, -2.5098, -1.4961, -2.1164,
         -2.9772],
        [-2.9772, -2.9772, -2.9772, -2.1323, -1.3786, -2.5098, -1.4961, -2.1164,
         -2.9772],
        [-2.9772, -2.9772, -2.9772, -2.1323, -1.3786, -2.5098, -1.4961, -2.1164,
         -2.9772],
        [-2.9772, -2.9772, -2.9772, -2.1323, -1.3786, -2.5098, -1.4961, -2.1164,
         -2.9772],
        [-2.9772, -2.9772, -2.9772, -2.1323, -1.3786, -2.5098, -1.4961, -2.1164,
         -2.9772],
        [-2.9772, -2.9772, -2.9772, -2.1323, -1.3786, -2.5098, -1.4961, -2.1164,
         -2.9772],
        [-2.9772, -2.9772, -2.9772, -2.1323, -1.3786, -2.5098, -1.4961, -2.1164,
         -2.9772]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[9.0172e-04, 6.6628e-03, 4.4894e-05, 9.0172e-04, 2.4511e-03, 1.2203e-04,
         9.8885e-01, 4.4894e-05, 1.6516e-05],
        [8.9781e-04, 2.44

tensor([[-2.9778, -2.9778, -2.9778, -2.1322, -1.3783, -2.5099, -1.4956, -2.1168,
         -2.9778],
        [-2.9778, -2.9778, -2.9778, -2.1322, -1.3783, -2.5099, -1.4956, -2.1168,
         -2.9778],
        [-2.9778, -2.9778, -2.9778, -2.1322, -1.3783, -2.5099, -1.4956, -2.1168,
         -2.9778],
        [-2.9778, -2.9778, -2.9778, -2.1322, -1.3783, -2.5099, -1.4956, -2.1168,
         -2.9778],
        [-2.9778, -2.9778, -2.9778, -2.1322, -1.3783, -2.5099, -1.4956, -2.1168,
         -2.9778],
        [-2.9778, -2.9778, -2.9778, -2.1322, -1.3783, -2.5099, -1.4956, -2.1168,
         -2.9778],
        [-2.9778, -2.9778, -2.9778, -2.1322, -1.3783, -2.5099, -1.4956, -2.1168,
         -2.9778],
        [-2.9778, -2.9778, -2.9778, -2.1322, -1.3783, -2.5099, -1.4956, -2.1168,
         -2.9778]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.4972e-02, 3.0072e-01, 5.5078e-03, 1.1063e-01, 1.1063e-01, 1.1063e-01,
         4.0698e-02, 3.0072e-01, 5.5078e-03],
        [5.7718e-03, 5.77

tensor([[-2.9777, -2.9777, -2.9777, -2.1308, -1.3783, -2.5105, -1.4961, -2.1171,
         -2.9777],
        [-2.9777, -2.9777, -2.9777, -2.1308, -1.3783, -2.5105, -1.4961, -2.1171,
         -2.9777],
        [-2.9777, -2.9777, -2.9777, -2.1308, -1.3783, -2.5105, -1.4961, -2.1171,
         -2.9777],
        [-2.9777, -2.9777, -2.9777, -2.1308, -1.3783, -2.5105, -1.4961, -2.1171,
         -2.9777],
        [-2.9777, -2.9777, -2.9777, -2.1308, -1.3783, -2.5105, -1.4961, -2.1171,
         -2.9777],
        [-2.9777, -2.9777, -2.9777, -2.1308, -1.3783, -2.5105, -1.4961, -2.1171,
         -2.9777],
        [-2.9777, -2.9777, -2.9777, -2.1308, -1.3783, -2.5105, -1.4961, -2.1171,
         -2.9777],
        [-2.9777, -2.9777, -2.9777, -2.1308, -1.3783, -2.5105, -1.4961, -2.1171,
         -2.9777]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[4.2270e-03, 4.2270e-03, 1.5550e-03, 4.2270e-03, 1.1490e-02, 2.3079e-01,
         6.2735e-01, 8.4902e-02, 3.1234e-02],
        [2.7393e-10, 5.01

tensor([[-2.9778, -2.9778, -2.9778, -2.1317, -1.3782, -2.5104, -1.4957, -2.1171,
         -2.9778],
        [-2.9778, -2.9778, -2.9778, -2.1317, -1.3782, -2.5104, -1.4957, -2.1171,
         -2.9778],
        [-2.9778, -2.9778, -2.9778, -2.1317, -1.3782, -2.5104, -1.4957, -2.1171,
         -2.9778],
        [-2.9778, -2.9778, -2.9778, -2.1317, -1.3782, -2.5104, -1.4957, -2.1171,
         -2.9778],
        [-2.9778, -2.9778, -2.9778, -2.1317, -1.3782, -2.5104, -1.4957, -2.1171,
         -2.9778],
        [-2.9778, -2.9778, -2.9778, -2.1317, -1.3782, -2.5104, -1.4957, -2.1171,
         -2.9778],
        [-2.9778, -2.9778, -2.9778, -2.1317, -1.3782, -2.5104, -1.4957, -2.1171,
         -2.9778],
        [-2.9778, -2.9778, -2.9778, -2.1317, -1.3782, -2.5104, -1.4957, -2.1171,
         -2.9778]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.4632e-02, 1.0812e-01, 1.9802e-03, 1.4632e-02, 5.3828e-03, 3.9774e-02,
         1.9802e-03, 7.9887e-01, 1.4632e-02],
        [4.6410e-03, 4.64

tensor([[-2.9784, -2.9784, -2.9784, -2.1320, -1.3783, -2.5100, -1.4946, -2.1178,
         -2.9784],
        [-2.9784, -2.9784, -2.9784, -2.1320, -1.3783, -2.5100, -1.4946, -2.1178,
         -2.9784],
        [-2.9784, -2.9784, -2.9784, -2.1320, -1.3783, -2.5100, -1.4946, -2.1178,
         -2.9784],
        [-2.9784, -2.9784, -2.9784, -2.1320, -1.3783, -2.5100, -1.4946, -2.1178,
         -2.9784],
        [-2.9784, -2.9784, -2.9784, -2.1320, -1.3783, -2.5100, -1.4946, -2.1178,
         -2.9784],
        [-2.9784, -2.9784, -2.9784, -2.1320, -1.3783, -2.5100, -1.4946, -2.1178,
         -2.9784],
        [-2.9784, -2.9784, -2.9784, -2.1320, -1.3783, -2.5100, -1.4946, -2.1178,
         -2.9784],
        [-2.9784, -2.9784, -2.9784, -2.1320, -1.3783, -2.5100, -1.4946, -2.1178,
         -2.9784]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.4972e-02, 3.0072e-01, 5.5078e-03, 1.1063e-01, 1.1063e-01, 1.1063e-01,
         4.0698e-02, 3.0072e-01, 5.5078e-03],
        [1.5633e-04, 3.13

tensor([[-2.9785, -2.9785, -2.9785, -2.1314, -1.3784, -2.5107, -1.4944, -2.1181,
         -2.9785],
        [-2.9785, -2.9785, -2.9785, -2.1314, -1.3784, -2.5107, -1.4944, -2.1181,
         -2.9785],
        [-2.9785, -2.9785, -2.9785, -2.1314, -1.3784, -2.5107, -1.4944, -2.1181,
         -2.9785],
        [-2.9785, -2.9785, -2.9785, -2.1314, -1.3784, -2.5107, -1.4944, -2.1181,
         -2.9785],
        [-2.9785, -2.9785, -2.9785, -2.1314, -1.3784, -2.5107, -1.4944, -2.1181,
         -2.9785],
        [-2.9785, -2.9785, -2.9785, -2.1314, -1.3784, -2.5107, -1.4944, -2.1181,
         -2.9785],
        [-2.9785, -2.9785, -2.9785, -2.1314, -1.3784, -2.5107, -1.4944, -2.1181,
         -2.9785],
        [-2.9785, -2.9785, -2.9785, -2.1314, -1.3784, -2.5107, -1.4944, -2.1181,
         -2.9785]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.5633e-04, 3.1399e-03, 1.1551e-03, 6.3067e-02, 4.6601e-01, 1.5633e-04,
         1.5633e-04, 1.5633e-04, 4.6601e-01],
        [2.6571e-10, 5.33

tensor([[-2.9784, -2.9784, -2.9784, -2.1311, -1.3789, -2.5102, -1.4949, -2.1167,
         -2.9784],
        [-2.9784, -2.9784, -2.9784, -2.1311, -1.3789, -2.5102, -1.4949, -2.1167,
         -2.9784],
        [-2.9784, -2.9784, -2.9784, -2.1311, -1.3789, -2.5102, -1.4949, -2.1167,
         -2.9784],
        [-2.9784, -2.9784, -2.9784, -2.1311, -1.3789, -2.5102, -1.4949, -2.1167,
         -2.9784],
        [-2.9784, -2.9784, -2.9784, -2.1311, -1.3789, -2.5102, -1.4949, -2.1167,
         -2.9784],
        [-2.9784, -2.9784, -2.9784, -2.1311, -1.3789, -2.5102, -1.4949, -2.1167,
         -2.9784],
        [-2.9784, -2.9784, -2.9784, -2.1311, -1.3789, -2.5102, -1.4949, -2.1167,
         -2.9784],
        [-2.9784, -2.9784, -2.9784, -2.1311, -1.3789, -2.5102, -1.4949, -2.1167,
         -2.9784]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[3.0009e-04, 6.0275e-03, 6.0275e-03, 3.2909e-01, 3.2909e-01, 4.0613e-05,
         4.0613e-05, 3.0009e-04, 3.2909e-01],
        [1.2422e-02, 1.24

tensor([[-2.9791, -2.9791, -2.9791, -2.1319, -1.3784, -2.5099, -1.4939, -2.1180,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1319, -1.3784, -2.5099, -1.4939, -2.1180,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1319, -1.3784, -2.5099, -1.4939, -2.1180,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1319, -1.3784, -2.5099, -1.4939, -2.1180,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1319, -1.3784, -2.5099, -1.4939, -2.1180,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1319, -1.3784, -2.5099, -1.4939, -2.1180,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1319, -1.3784, -2.5099, -1.4939, -2.1180,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1319, -1.3784, -2.5099, -1.4939, -2.1180,
         -2.9791]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[3.0413e-02, 1.1188e-02, 4.1159e-03, 8.2670e-02, 4.1159e-03, 1.5141e-03,
         3.0413e-02, 6.1085e-01, 2.2472e-01],
        [2.0392e-04, 1.50

tensor([[-2.9791, -2.9791, -2.9791, -2.1320, -1.3791, -2.5108, -1.4928, -2.1176,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1320, -1.3791, -2.5108, -1.4928, -2.1176,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1320, -1.3791, -2.5108, -1.4928, -2.1176,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1320, -1.3791, -2.5108, -1.4928, -2.1176,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1320, -1.3791, -2.5108, -1.4928, -2.1176,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1320, -1.3791, -2.5108, -1.4928, -2.1176,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1320, -1.3791, -2.5108, -1.4928, -2.1176,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1320, -1.3791, -2.5108, -1.4928, -2.1176,
         -2.9791]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[8.2172e-08, 2.4495e-04, 2.2337e-07, 2.6862e-01, 3.3151e-05, 6.6585e-04,
         3.0229e-08, 2.4495e-04, 7.3019e-01],
        [8.9781e-04, 2.44

tensor([[-2.9794, -2.9794, -2.9794, -2.1313, -1.3792, -2.5111, -1.4925, -2.1181,
         -2.9794],
        [-2.9794, -2.9794, -2.9794, -2.1313, -1.3792, -2.5111, -1.4925, -2.1181,
         -2.9794],
        [-2.9794, -2.9794, -2.9794, -2.1313, -1.3792, -2.5111, -1.4925, -2.1181,
         -2.9794],
        [-2.9794, -2.9794, -2.9794, -2.1313, -1.3792, -2.5111, -1.4925, -2.1181,
         -2.9794],
        [-2.9794, -2.9794, -2.9794, -2.1313, -1.3792, -2.5111, -1.4925, -2.1181,
         -2.9794],
        [-2.9794, -2.9794, -2.9794, -2.1313, -1.3792, -2.5111, -1.4925, -2.1181,
         -2.9794],
        [-2.9794, -2.9794, -2.9794, -2.1313, -1.3792, -2.5111, -1.4925, -2.1181,
         -2.9794],
        [-2.9794, -2.9794, -2.9794, -2.1313, -1.3792, -2.5111, -1.4925, -2.1181,
         -2.9794]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.6571e-10, 5.3370e-09, 1.5909e-05, 4.7425e-02, 9.5255e-01, 2.6571e-10,
         1.9634e-09, 3.9435e-08, 5.8527e-06],
        [2.2299e-03, 6.06

tensor([[-2.9791, -2.9791, -2.9791, -2.1316, -1.3790, -2.5105, -1.4932, -2.1179,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1316, -1.3790, -2.5105, -1.4932, -2.1179,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1316, -1.3790, -2.5105, -1.4932, -2.1179,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1316, -1.3790, -2.5105, -1.4932, -2.1179,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1316, -1.3790, -2.5105, -1.4932, -2.1179,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1316, -1.3790, -2.5105, -1.4932, -2.1179,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1316, -1.3790, -2.5105, -1.4932, -2.1179,
         -2.9791],
        [-2.9791, -2.9791, -2.9791, -2.1316, -1.3790, -2.5105, -1.4932, -2.1179,
         -2.9791]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.3384e-03, 6.3564e-03, 1.7279e-02, 9.4338e-01, 6.3564e-03, 1.7279e-02,
         2.3384e-03, 2.3384e-03, 2.3384e-03],
        [1.8226e-02, 4.95

tensor([[-2.9796, -2.9796, -2.9796, -2.1318, -1.3784, -2.5116, -1.4928, -2.1183,
         -2.9796],
        [-2.9796, -2.9796, -2.9796, -2.1318, -1.3784, -2.5116, -1.4928, -2.1183,
         -2.9796],
        [-2.9796, -2.9796, -2.9796, -2.1318, -1.3784, -2.5116, -1.4928, -2.1183,
         -2.9796],
        [-2.9796, -2.9796, -2.9796, -2.1318, -1.3784, -2.5116, -1.4928, -2.1183,
         -2.9796],
        [-2.9796, -2.9796, -2.9796, -2.1318, -1.3784, -2.5116, -1.4928, -2.1183,
         -2.9796],
        [-2.9796, -2.9796, -2.9796, -2.1318, -1.3784, -2.5116, -1.4928, -2.1183,
         -2.9796],
        [-2.9796, -2.9796, -2.9796, -2.1318, -1.3784, -2.5116, -1.4928, -2.1183,
         -2.9796],
        [-2.9796, -2.9796, -2.9796, -2.1318, -1.3784, -2.5116, -1.4928, -2.1183,
         -2.9796]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[4.5614e-02, 2.2710e-03, 2.2710e-03, 2.2710e-03, 6.1731e-03, 1.6780e-02,
         9.1618e-01, 6.1731e-03, 2.2710e-03],
        [5.8141e-03, 8.62

tensor([[-2.9801, -2.9801, -2.9801, -2.1320, -1.3784, -2.5122, -1.4913, -2.1194,
         -2.9801],
        [-2.9801, -2.9801, -2.9801, -2.1320, -1.3784, -2.5122, -1.4913, -2.1194,
         -2.9801],
        [-2.9801, -2.9801, -2.9801, -2.1320, -1.3784, -2.5122, -1.4913, -2.1194,
         -2.9801],
        [-2.9801, -2.9801, -2.9801, -2.1320, -1.3784, -2.5122, -1.4913, -2.1194,
         -2.9801],
        [-2.9801, -2.9801, -2.9801, -2.1320, -1.3784, -2.5122, -1.4913, -2.1194,
         -2.9801],
        [-2.9801, -2.9801, -2.9801, -2.1320, -1.3784, -2.5122, -1.4913, -2.1194,
         -2.9801],
        [-2.9801, -2.9801, -2.9801, -2.1320, -1.3784, -2.5122, -1.4913, -2.1194,
         -2.9801],
        [-2.9801, -2.9801, -2.9801, -2.1320, -1.3784, -2.5122, -1.4913, -2.1194,
         -2.9801]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.2572e-02, 3.4173e-02, 1.7014e-03, 4.6249e-03, 1.7014e-03, 2.5251e-01,
         6.8639e-01, 4.6249e-03, 1.7014e-03],
        [4.8426e-04, 3.57

tensor([[-2.9800, -2.9800, -2.9800, -2.1323, -1.3791, -2.5121, -1.4912, -2.1180,
         -2.9800],
        [-2.9800, -2.9800, -2.9800, -2.1323, -1.3791, -2.5121, -1.4912, -2.1180,
         -2.9800],
        [-2.9800, -2.9800, -2.9800, -2.1323, -1.3791, -2.5121, -1.4912, -2.1180,
         -2.9800],
        [-2.9800, -2.9800, -2.9800, -2.1323, -1.3791, -2.5121, -1.4912, -2.1180,
         -2.9800],
        [-2.9800, -2.9800, -2.9800, -2.1323, -1.3791, -2.5121, -1.4912, -2.1180,
         -2.9800],
        [-2.9800, -2.9800, -2.9800, -2.1323, -1.3791, -2.5121, -1.4912, -2.1180,
         -2.9800],
        [-2.9800, -2.9800, -2.9800, -2.1323, -1.3791, -2.5121, -1.4912, -2.1180,
         -2.9800],
        [-2.9800, -2.9800, -2.9800, -2.1323, -1.3791, -2.5121, -1.4912, -2.1180,
         -2.9800]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.6299e-03, 2.4190e-01, 1.6299e-03, 5.9961e-04, 8.8991e-02, 4.4306e-03,
         1.6299e-03, 6.5756e-01, 1.6299e-03],
        [2.6571e-10, 5.33

tensor([[-2.9797, -2.9797, -2.9797, -2.1308, -1.3796, -2.5115, -1.4921, -2.1179,
         -2.9797],
        [-2.9797, -2.9797, -2.9797, -2.1308, -1.3796, -2.5115, -1.4921, -2.1179,
         -2.9797],
        [-2.9797, -2.9797, -2.9797, -2.1308, -1.3796, -2.5115, -1.4921, -2.1179,
         -2.9797],
        [-2.9797, -2.9797, -2.9797, -2.1308, -1.3796, -2.5115, -1.4921, -2.1179,
         -2.9797],
        [-2.9797, -2.9797, -2.9797, -2.1308, -1.3796, -2.5115, -1.4921, -2.1179,
         -2.9797],
        [-2.9797, -2.9797, -2.9797, -2.1308, -1.3796, -2.5115, -1.4921, -2.1179,
         -2.9797],
        [-2.9797, -2.9797, -2.9797, -2.1308, -1.3796, -2.5115, -1.4921, -2.1179,
         -2.9797],
        [-2.9797, -2.9797, -2.9797, -2.1308, -1.3796, -2.5115, -1.4921, -2.1179,
         -2.9797]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[8.2172e-08, 2.4495e-04, 2.2337e-07, 2.6862e-01, 3.3151e-05, 6.6585e-04,
         3.0229e-08, 2.4495e-04, 7.3019e-01],
        [3.8200e-02, 1.40

tensor([[-2.9798, -2.9798, -2.9798, -2.1311, -1.3804, -2.5099, -1.4912, -2.1184,
         -2.9798],
        [-2.9798, -2.9798, -2.9798, -2.1311, -1.3804, -2.5099, -1.4912, -2.1184,
         -2.9798],
        [-2.9798, -2.9798, -2.9798, -2.1311, -1.3804, -2.5099, -1.4912, -2.1184,
         -2.9798],
        [-2.9798, -2.9798, -2.9798, -2.1311, -1.3804, -2.5099, -1.4912, -2.1184,
         -2.9798],
        [-2.9798, -2.9798, -2.9798, -2.1311, -1.3804, -2.5099, -1.4912, -2.1184,
         -2.9798],
        [-2.9798, -2.9798, -2.9798, -2.1311, -1.3804, -2.5099, -1.4912, -2.1184,
         -2.9798],
        [-2.9798, -2.9798, -2.9798, -2.1311, -1.3804, -2.5099, -1.4912, -2.1184,
         -2.9798],
        [-2.9798, -2.9798, -2.9798, -2.1311, -1.3804, -2.5099, -1.4912, -2.1184,
         -2.9798]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[4.2270e-03, 4.2270e-03, 1.5550e-03, 4.2270e-03, 1.1490e-02, 2.3079e-01,
         6.2735e-01, 8.4902e-02, 3.1234e-02],
        [2.4385e-03, 3.61

tensor([[-2.9799, -2.9799, -2.9799, -2.1309, -1.3804, -2.5101, -1.4913, -2.1181,
         -2.9799],
        [-2.9799, -2.9799, -2.9799, -2.1309, -1.3804, -2.5101, -1.4913, -2.1181,
         -2.9799],
        [-2.9799, -2.9799, -2.9799, -2.1309, -1.3804, -2.5101, -1.4913, -2.1181,
         -2.9799],
        [-2.9799, -2.9799, -2.9799, -2.1309, -1.3804, -2.5101, -1.4913, -2.1181,
         -2.9799],
        [-2.9799, -2.9799, -2.9799, -2.1309, -1.3804, -2.5101, -1.4913, -2.1181,
         -2.9799],
        [-2.9799, -2.9799, -2.9799, -2.1309, -1.3804, -2.5101, -1.4913, -2.1181,
         -2.9799],
        [-2.9799, -2.9799, -2.9799, -2.1309, -1.3804, -2.5101, -1.4913, -2.1181,
         -2.9799],
        [-2.9799, -2.9799, -2.9799, -2.1309, -1.3804, -2.5101, -1.4913, -2.1181,
         -2.9799]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[5.3129e-02, 1.4442e-01, 9.7310e-04, 9.7310e-04, 7.1903e-03, 7.1903e-03,
         3.9258e-01, 3.9258e-01, 9.7310e-04],
        [6.5488e-04, 2.64

tensor([[-2.9798, -2.9798, -2.9798, -2.1313, -1.3803, -2.5102, -1.4919, -2.1168,
         -2.9798],
        [-2.9798, -2.9798, -2.9798, -2.1313, -1.3803, -2.5102, -1.4919, -2.1168,
         -2.9798],
        [-2.9798, -2.9798, -2.9798, -2.1313, -1.3803, -2.5102, -1.4919, -2.1168,
         -2.9798],
        [-2.9798, -2.9798, -2.9798, -2.1313, -1.3803, -2.5102, -1.4919, -2.1168,
         -2.9798],
        [-2.9798, -2.9798, -2.9798, -2.1313, -1.3803, -2.5102, -1.4919, -2.1168,
         -2.9798],
        [-2.9798, -2.9798, -2.9798, -2.1313, -1.3803, -2.5102, -1.4919, -2.1168,
         -2.9798],
        [-2.9798, -2.9798, -2.9798, -2.1313, -1.3803, -2.5102, -1.4919, -2.1168,
         -2.9798],
        [-2.9798, -2.9798, -2.9798, -2.1313, -1.3803, -2.5102, -1.4919, -2.1168,
         -2.9798]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.6299e-03, 2.4190e-01, 1.6299e-03, 5.9961e-04, 8.8991e-02, 4.4306e-03,
         1.6299e-03, 6.5756e-01, 1.6299e-03],
        [5.8141e-03, 8.62

tensor([[-2.9802, -2.9802, -2.9802, -2.1313, -1.3812, -2.5102, -1.4897, -2.1183,
         -2.9802],
        [-2.9802, -2.9802, -2.9802, -2.1313, -1.3812, -2.5102, -1.4897, -2.1183,
         -2.9802],
        [-2.9802, -2.9802, -2.9802, -2.1313, -1.3812, -2.5102, -1.4897, -2.1183,
         -2.9802],
        [-2.9802, -2.9802, -2.9802, -2.1313, -1.3812, -2.5102, -1.4897, -2.1183,
         -2.9802],
        [-2.9802, -2.9802, -2.9802, -2.1313, -1.3812, -2.5102, -1.4897, -2.1183,
         -2.9802],
        [-2.9802, -2.9802, -2.9802, -2.1313, -1.3812, -2.5102, -1.4897, -2.1183,
         -2.9802],
        [-2.9802, -2.9802, -2.9802, -2.1313, -1.3812, -2.5102, -1.4897, -2.1183,
         -2.9802],
        [-2.9802, -2.9802, -2.9802, -2.1313, -1.3812, -2.5102, -1.4897, -2.1183,
         -2.9802]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.0948e-02, 1.5479e-01, 2.0948e-02, 5.6943e-02, 4.2076e-01, 1.5479e-01,
         5.6943e-02, 5.6943e-02, 5.6943e-02],
        [2.9075e-04, 1.17

tensor([[-2.9803, -2.9803, -2.9803, -2.1315, -1.3814, -2.5098, -1.4897, -2.1180,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1315, -1.3814, -2.5098, -1.4897, -2.1180,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1315, -1.3814, -2.5098, -1.4897, -2.1180,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1315, -1.3814, -2.5098, -1.4897, -2.1180,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1315, -1.3814, -2.5098, -1.4897, -2.1180,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1315, -1.3814, -2.5098, -1.4897, -2.1180,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1315, -1.3814, -2.5098, -1.4897, -2.1180,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1315, -1.3814, -2.5098, -1.4897, -2.1180,
         -2.9803]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.3384e-03, 6.3564e-03, 1.7279e-02, 9.4338e-01, 6.3564e-03, 1.7279e-02,
         2.3384e-03, 2.3384e-03, 2.3384e-03],
        [5.2149e-04, 3.85

tensor([[-2.9803, -2.9803, -2.9803, -2.1315, -1.3812, -2.5099, -1.4897, -2.1181,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1315, -1.3812, -2.5099, -1.4897, -2.1181,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1315, -1.3812, -2.5099, -1.4897, -2.1181,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1315, -1.3812, -2.5099, -1.4897, -2.1181,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1315, -1.3812, -2.5099, -1.4897, -2.1181,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1315, -1.3812, -2.5099, -1.4897, -2.1181,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1315, -1.3812, -2.5099, -1.4897, -2.1181,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1315, -1.3812, -2.5099, -1.4897, -2.1181,
         -2.9803]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[9.0172e-04, 6.6628e-03, 4.4894e-05, 9.0172e-04, 2.4511e-03, 1.2203e-04,
         9.8885e-01, 4.4894e-05, 1.6516e-05],
        [5.5558e-03, 5.55

tensor([[-2.9803, -2.9803, -2.9803, -2.1314, -1.3813, -2.5099, -1.4898, -2.1180,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1314, -1.3813, -2.5099, -1.4898, -2.1180,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1314, -1.3813, -2.5099, -1.4898, -2.1180,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1314, -1.3813, -2.5099, -1.4898, -2.1180,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1314, -1.3813, -2.5099, -1.4898, -2.1180,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1314, -1.3813, -2.5099, -1.4898, -2.1180,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1314, -1.3813, -2.5099, -1.4898, -2.1180,
         -2.9803],
        [-2.9803, -2.9803, -2.9803, -2.1314, -1.3813, -2.5099, -1.4898, -2.1180,
         -2.9803]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[9.0557e-04, 2.4616e-03, 1.2256e-04, 4.5086e-05, 6.1017e-06, 2.4616e-03,
         9.9308e-01, 9.0557e-04, 1.6586e-05],
        [2.0392e-04, 1.50

tensor([[-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4898, -2.1179,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4898, -2.1179,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4898, -2.1179,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4898, -2.1179,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4898, -2.1179,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4898, -2.1179,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4898, -2.1179,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4898, -2.1179,
         -2.9804]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[8.8802e-05, 8.8802e-05, 1.7836e-03, 2.6472e-01, 7.1957e-01, 2.4139e-04,
         8.8802e-05, 2.4139e-04, 1.3179e-02],
        [1.5031e-02, 2.03

tensor([[-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4897, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4897, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4897, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4897, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4897, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4897, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4897, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3813, -2.5099, -1.4897, -2.1180,
         -2.9804]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[5.2149e-04, 3.8533e-03, 1.4175e-03, 5.2149e-04, 5.2149e-04, 2.1038e-01,
         5.7188e-01, 2.1038e-01, 5.2149e-04],
        [5.8577e-03, 5.85

tensor([[-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1179,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1179,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1179,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1179,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1179,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1179,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1179,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1179,
         -2.9804]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[8.1559e-04, 2.2170e-03, 8.1559e-04, 8.1559e-04, 6.0264e-03, 3.2903e-01,
         3.2903e-01, 3.2903e-01, 2.2170e-03],
        [8.9807e-05, 7.27

tensor([[-2.9804, -2.9804, -2.9804, -2.1315, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[4.1297e-08, 2.0561e-09, 2.5374e-13, 6.8973e-13, 2.5374e-13, 2.4726e-03,
         9.9753e-01, 2.0561e-09, 2.5374e-13],
        [2.1386e-02, 2.89

tensor([[-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4896, -2.1180,
         -2.9804]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[4.6800e-02, 4.6800e-02, 6.3337e-03, 4.6800e-02, 1.7217e-02, 1.7217e-02,
         3.4581e-01, 3.4581e-01, 1.2722e-01],
        [2.3384e-03, 6.35

tensor([[-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4894, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4894, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4894, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4894, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4894, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4894, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4894, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4894, -2.1180,
         -2.9804]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[5.7321e-03, 2.1087e-03, 2.1087e-03, 5.7321e-03, 2.1087e-03, 1.5581e-02,
         1.1513e-01, 8.5072e-01, 7.7576e-04],
        [2.6425e-01, 2.40

tensor([[-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5099, -1.4895, -2.1181,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5099, -1.4895, -2.1181,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5099, -1.4895, -2.1181,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5099, -1.4895, -2.1181,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5099, -1.4895, -2.1181,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5099, -1.4895, -2.1181,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5099, -1.4895, -2.1181,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5099, -1.4895, -2.1181,
         -2.9804]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[9.1038e-04, 1.2321e-04, 3.3491e-04, 1.2321e-04, 4.5325e-05, 4.5325e-05,
         9.9836e-01, 1.6674e-05, 4.5325e-05],
        [2.7598e-11, 2.75

tensor([[-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4895, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4895, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4895, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4895, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4895, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4895, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4895, -2.1180,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3814, -2.5100, -1.4895, -2.1180,
         -2.9804]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[5.9991e-03, 1.6307e-02, 5.9991e-03, 1.6307e-02, 8.9034e-01, 2.2069e-03,
         2.2069e-03, 1.6307e-02, 4.4327e-02],
        [8.9781e-04, 2.44

tensor([[-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5101, -1.4894, -2.1182,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5101, -1.4894, -2.1182,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5101, -1.4894, -2.1182,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5101, -1.4894, -2.1182,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5101, -1.4894, -2.1182,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5101, -1.4894, -2.1182,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5101, -1.4894, -2.1182,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5101, -1.4894, -2.1182,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[8.3016e-04, 6.1341e-03, 1.6674e-02, 6.1341e-03, 6.1341e-03, 2.2566e-03,
         4.5325e-02, 9.1038e-01, 6.1341e-03],
        [8.1559e-04, 2.21

tensor([[-2.9805, -2.9805, -2.9805, -2.1315, -1.3814, -2.5102, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1315, -1.3814, -2.5102, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1315, -1.3814, -2.5102, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1315, -1.3814, -2.5102, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1315, -1.3814, -2.5102, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1315, -1.3814, -2.5102, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1315, -1.3814, -2.5102, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1315, -1.3814, -2.5102, -1.4892, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.3384e-03, 6.3564e-03, 1.7279e-02, 9.4338e-01, 6.3564e-03, 1.7279e-02,
         2.3384e-03, 2.3384e-03, 2.3384e-03],
        [4.5614e-02, 2.27

tensor([[-2.9805, -2.9805, -2.9805, -2.1316, -1.3813, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1316, -1.3813, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1316, -1.3813, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1316, -1.3813, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1316, -1.3813, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1316, -1.3813, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1316, -1.3813, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1316, -1.3813, -2.5101, -1.4892, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.5286e-01, 7.6104e-03, 4.1552e-01, 1.5286e-01, 5.6234e-02, 2.0687e-02,
         1.5286e-01, 2.0687e-02, 2.0687e-02],
        [6.1240e-06, 3.34

tensor([[-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5100, -1.4893, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5100, -1.4893, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5100, -1.4893, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5100, -1.4893, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5100, -1.4893, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5100, -1.4893, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5100, -1.4893, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3814, -2.5100, -1.4893, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[7.4267e-03, 2.7321e-03, 7.4267e-03, 7.4267e-03, 7.4267e-03, 4.0548e-01,
         4.0548e-01, 1.4917e-01, 7.4267e-03],
        [2.6425e-01, 2.40

tensor([[-2.9805, -2.9805, -2.9805, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[8.2172e-08, 2.4495e-04, 2.2337e-07, 2.6862e-01, 3.3151e-05, 6.6585e-04,
         3.0229e-08, 2.4495e-04, 7.3019e-01],
        [1.0373e-03, 2.81

tensor([[-2.9804, -2.9804, -2.9804, -2.1315, -1.3816, -2.5100, -1.4892, -2.1182,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3816, -2.5100, -1.4892, -2.1182,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3816, -2.5100, -1.4892, -2.1182,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3816, -2.5100, -1.4892, -2.1182,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3816, -2.5100, -1.4892, -2.1182,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3816, -2.5100, -1.4892, -2.1182,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3816, -2.5100, -1.4892, -2.1182,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3816, -2.5100, -1.4892, -2.1182,
         -2.9804]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[5.3129e-02, 1.4442e-01, 9.7310e-04, 9.7310e-04, 7.1903e-03, 7.1903e-03,
         3.9258e-01, 3.9258e-01, 9.7310e-04],
        [1.2572e-02, 3.41

tensor([[-2.9804, -2.9804, -2.9804, -2.1315, -1.3815, -2.5102, -1.4891, -2.1182,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3815, -2.5102, -1.4891, -2.1182,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3815, -2.5102, -1.4891, -2.1182,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3815, -2.5102, -1.4891, -2.1182,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3815, -2.5102, -1.4891, -2.1182,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3815, -2.5102, -1.4891, -2.1182,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3815, -2.5102, -1.4891, -2.1182,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1315, -1.3815, -2.5102, -1.4891, -2.1182,
         -2.9804]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.0622e-01, 2.8872e-01, 7.1568e-04, 1.9454e-03, 5.2882e-03, 2.8872e-01,
         1.4375e-02, 2.8872e-01, 5.2882e-03],
        [1.5031e-02, 2.03

tensor([[-2.9804, -2.9804, -2.9804, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9804],
        [-2.9804, -2.9804, -2.9804, -2.1314, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9804]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.4385e-03, 3.6190e-01, 6.6285e-03, 3.6190e-01, 1.3314e-01, 4.8978e-02,
         4.8978e-02, 1.8018e-02, 1.8018e-02],
        [4.3995e-03, 8.83

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3815, -2.5101, -1.4892, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[9.0172e-04, 6.6628e-03, 4.4894e-05, 9.0172e-04, 2.4511e-03, 1.2203e-04,
         9.8885e-01, 4.4894e-05, 1.6516e-05],
        [4.1297e-08, 2.05

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[9.0172e-04, 6.6628e-03, 4.4894e-05, 9.0172e-04, 2.4511e-03, 1.2203e-04,
         9.8885e-01, 4.4894e-05, 1.6516e-05],
        [9.1038e-04, 1.23

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[3.0413e-02, 1.1188e-02, 4.1159e-03, 8.2670e-02, 4.1159e-03, 1.5141e-03,
         3.0413e-02, 6.1085e-01, 2.2472e-01],
        [5.5011e-09, 1.79

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.4632e-02, 1.0812e-01, 1.9802e-03, 1.4632e-02, 5.3828e-03, 3.9774e-02,
         1.9802e-03, 7.9887e-01, 1.4632e-02],
        [2.9075e-04, 1.17

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.6299e-03, 2.4190e-01, 1.6299e-03, 5.9961e-04, 8.8991e-02, 4.4306e-03,
         1.6299e-03, 6.5756e-01, 1.6299e-03],
        [9.0172e-04, 6.66

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.4632e-02, 1.0812e-01, 1.9802e-03, 1.4632e-02, 5.3828e-03, 3.9774e-02,
         1.9802e-03, 7.9887e-01, 1.4632e-02],
        [1.0622e-01, 2.88

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.1386e-02, 2.8942e-03, 2.1386e-02, 4.2954e-01, 7.8673e-03, 5.8132e-02,
         4.2954e-01, 2.1386e-02, 7.8673e-03],
        [2.6425e-01, 2.40

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.5031e-02, 2.0342e-03, 8.2066e-01, 4.0858e-02, 1.1106e-01, 7.4835e-04,
         5.5296e-03, 2.0342e-03, 2.0342e-03],
        [1.5633e-04, 3.13

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.5633e-04, 3.1399e-03, 1.1551e-03, 6.3067e-02, 4.6601e-01, 1.5633e-04,
         1.5633e-04, 1.5633e-04, 4.6601e-01],
        [8.2172e-08, 2.44

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[6.1423e-06, 9.9968e-01, 8.3127e-07, 1.2337e-04, 1.2337e-04, 5.6010e-09,
         4.5386e-05, 1.6696e-05, 8.3127e-07],
        [1.2572e-02, 3.41

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.4632e-02, 1.0812e-01, 1.9802e-03, 1.4632e-02, 5.3828e-03, 3.9774e-02,
         1.9802e-03, 7.9887e-01, 1.4632e-02],
        [5.1867e-03, 1.90

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[6.5488e-04, 2.6420e-01, 8.8628e-05, 6.5488e-04, 1.7801e-03, 7.1816e-01,
         6.5488e-04, 1.3154e-02, 6.5488e-04],
        [9.1038e-04, 1.23

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.2200e-03, 2.2200e-03, 4.4590e-02, 8.9561e-01, 4.4590e-02, 2.2200e-03,
         2.2200e-03, 3.0044e-04, 6.0345e-03],
        [5.7718e-03, 5.77

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.5226e-08, 7.5803e-10, 4.5387e-05, 9.9971e-01, 1.2337e-04, 3.7740e-11,
         1.2337e-04, 7.5803e-10, 2.0606e-09],
        [9.1038e-04, 1.23

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[4.1966e-03, 8.4290e-02, 1.5438e-03, 2.2912e-01, 6.2282e-01, 4.1966e-03,
         1.1407e-02, 1.1407e-02, 3.1009e-02],
        [5.6441e-05, 4.17

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.7393e-10, 5.0172e-12, 9.8201e-01, 1.7986e-02, 3.0040e-07, 6.7900e-13,
         1.0077e-10, 5.0172e-12, 1.1051e-07],
        [6.1240e-06, 3.34

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.1386e-02, 2.8942e-03, 2.1386e-02, 4.2954e-01, 7.8673e-03, 5.8132e-02,
         4.2954e-01, 2.1386e-02, 7.8673e-03],
        [2.3384e-03, 6.35

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[1.5031e-02, 2.0342e-03, 8.2066e-01, 4.0858e-02, 1.1106e-01, 7.4835e-04,
         5.5296e-03, 2.0342e-03, 2.0342e-03],
        [5.8141e-03, 8.62

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[8.3151e-07, 2.0611e-09, 3.7751e-11, 2.0611e-09, 1.6701e-05, 1.1253e-07,
         9.9998e-01, 2.2603e-06, 3.7751e-11],
        [7.5794e-10, 4.53

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[2.2368e-03, 4.4927e-02, 8.2286e-04, 2.2368e-03, 4.4927e-02, 8.2286e-04,
         8.2286e-04, 9.0238e-01, 8.2286e-04],
        [2.1386e-02, 2.89

tensor([[-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805],
        [-2.9805, -2.9805, -2.9805, -2.1313, -1.3816, -2.5100, -1.4891, -2.1183,
         -2.9805]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
tensor([[3.8200e-02, 1.4053e-02, 7.6726e-01, 1.0384e-01, 3.8200e-02, 5.1698e-03,
         1.4053e-02, 5.1698e-03, 1.4053e-02],
        [5.5995e-09, 6.14

In [None]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())
# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])
torch.save(model.state_dict(), 'dataset_model_soundemotion.pt')

## NOTES

below is the mfccs notes / random code

In [13]:
audio, sample_rate = librosa.load("SongEmotionDataset/1.mp3", res_type='kaiser_fast')
# [print(x) for x in audio]

#convert audio into 2d array
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
# mfccsscaled = np.mean(mfccs.T,axis=0)
print(mfccs.shape, audio.shape)
mfccs



(40, 2586) (1323648,)


array([[-5.30341797e+02, -4.07741577e+02, -3.27536621e+02, ...,
        -2.39811523e+02, -1.96744080e+02, -1.44711777e+02],
       [ 5.81265569e-01,  1.03006027e+02,  1.29354553e+02, ...,
         1.48707626e+02,  1.45873001e+02,  1.28202530e+02],
       [ 4.58764762e-01,  7.53921986e+00, -1.18814125e+01, ...,
        -2.51551704e+01, -1.92207527e+01, -1.79366188e+01],
       ...,
       [ 3.11299562e-01, -1.29907084e+00,  1.18818974e+00, ...,
        -6.58579540e+00, -3.34302998e+00, -4.75482178e+00],
       [ 2.23848164e-01, -3.19489312e+00, -2.78556681e+00, ...,
        -1.36089420e+01, -6.40699673e+00, -5.27228928e+00],
       [ 8.67742151e-02,  1.31472754e+00, -1.41885233e+00, ...,
         3.34440261e-01,  1.14392626e+00, -3.62402201e-02]], dtype=float32)

In [10]:
# audio_tensor = torch.tensor(audio)
# audio_tensor
# audio_tensor.shape

In [16]:
# for sound_file in data_path.iterdir():
#     if ".mp3" in str(sound_file):
#         print(sound_file)
#         audio, sample_rate = librosa.load(str(sound_file), res_type='kaiser_fast')
        
    