In [20]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader
%matplotlib inline
%config InlineBackend.figure_format='retina'
print('Pytorch version :', torch.__version__)
device = torch.device('cuda:0' if torch.cuda.is_available else 'cpu')
print('device :', device)

Pytorch version : 1.7.1
device : cuda:0


In [6]:
train_data = datasets.MNIST('./data',train=True,transform=transforms.ToTensor(),download=False)
test_data = datasets.MNIST('./data',train=False,transform=transforms.ToTensor(),download=False)

In [8]:
BATCH_SIZE = 256
train_iter = DataLoader(train_data,batch_size=BATCH_SIZE,shuffle=True,num_workers=1)
test_iter = DataLoader(test_data,batch_size=BATCH_SIZE,shuffle=True,num_workers=1)

In [21]:
class MLP(nn.Module):
    def __init__(self, x_dim, h_dim, y_dim):
        super(MLP, self).__init__()
        self.x_dim = x_dim
        self.h_dim = h_dim
        self.y_dim = y_dim
        self.lin_1 = nn.Linear(x_dim, h_dim)
        self.lin_2 = nn.Linear(h_dim, y_dim)
        self.init_param()
        
    def init_param(self):
        for name, param in self.named_parameters():
            if 'weight' in name:
                nn.init.kaiming_normal_(param)
            else:
                nn.init.zeros_(param)
    
    def forward(self, x):
        x = self.lin_2(F.relu(self.lin_1(x)))
        return nn.Softmax(x)

In [26]:
model = MLP(x_dim=784, h_dim=256, y_dim=10).to(device)

In [27]:
for name, param in model.named_parameters():
    print(name, param)

lin_1.weight Parameter containing:
tensor([[ 0.0550,  0.0130, -0.0717,  ..., -0.0430,  0.0790, -0.0781],
        [-0.0472, -0.0671, -0.1243,  ..., -0.0626,  0.0265,  0.0170],
        [ 0.0197,  0.0420, -0.0035,  ..., -0.0070,  0.0761, -0.0688],
        ...,
        [ 0.0074,  0.0235,  0.0645,  ...,  0.0329,  0.1075, -0.0202],
        [ 0.0350, -0.0392, -0.0007,  ..., -0.0509,  0.0385, -0.0209],
        [ 0.0231, -0.0434,  0.0342,  ...,  0.0130, -0.0198,  0.0524]],
       device='cuda:0', requires_grad=True)
lin_1.bias Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0.

In [28]:
def evaluation(data_iter,model,device):
    for batch_in, batch_label in data_iter:
        x = batch_in.view(-1,28*28).to(device)
        y = batch_label.to(device)
        y_predict = model.forward(x)
        _, indices = torch.max(y_predict, dim=-1)
        

In [29]:
evaluation(train_iter,model,device)

Softmax(
  dim=tensor([[-0.6838,  0.3310,  0.5917,  ..., -0.0870,  0.6486, -0.0334],
          [-0.3283,  0.6309,  0.3565,  ...,  0.0298,  0.6258, -0.2862],
          [-0.7445,  0.8789,  0.7630,  ...,  0.2059,  0.8048,  0.3830],
          ...,
          [-1.0277,  0.9990,  0.3657,  ...,  0.1947,  0.2932, -0.0283],
          [-0.3907,  0.2720,  0.7960,  ...,  0.0204,  0.0867,  0.2598],
          [-1.0766,  0.1647,  0.5466,  ..., -0.4222,  0.7864, -0.1261]],
         device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([9, 6, 6, 4, 8, 8, 9, 1, 6, 4, 9, 9, 1, 9, 5, 5, 9, 2, 1, 5, 1, 9, 9, 3,
        6, 8, 5, 2, 4, 3, 6, 2, 3, 5, 7, 1, 9, 2, 3, 9, 8, 9, 2, 8, 3, 5, 8, 1,
        4, 3, 5, 3, 7, 3, 0, 4, 7, 1, 9, 1, 9, 8, 1, 2, 6, 2, 7, 0, 7, 5, 8, 3,
        1, 7, 3, 3, 6, 0, 2, 6, 2, 6, 9, 0, 9, 8, 3, 8, 0, 0, 0, 6, 7, 7, 0, 8,
        4, 4, 4, 6, 6, 4, 9, 5, 0, 4, 5, 3, 4, 6, 1, 8, 1, 1, 8, 0, 2, 8, 3, 6,
        4, 3, 0, 3, 0, 4, 6, 2, 6, 7, 0, 5, 1, 0, 9, 7, 9, 9, 6, 6, 0, 8, 1, 4,
      

) tensor([0, 4, 6, 2, 7, 1, 5, 7, 3, 5, 3, 0, 8, 0, 4, 9, 4, 3, 1, 0, 2, 6, 2, 7,
        0, 9, 1, 7, 8, 5, 6, 9, 4, 3, 8, 8, 5, 6, 8, 9, 3, 6, 3, 4, 9, 6, 1, 9,
        5, 2, 2, 0, 1, 4, 0, 1, 4, 4, 9, 4, 6, 8, 7, 2, 4, 2, 2, 3, 1, 1, 0, 2,
        3, 2, 0, 5, 9, 4, 7, 5, 7, 4, 9, 8, 3, 5, 7, 9, 3, 1, 4, 6, 6, 3, 2, 6,
        8, 6, 9, 0, 1, 7, 5, 4, 8, 0, 9, 9, 0, 5, 2, 6, 9, 0, 8, 5, 8, 0, 9, 8,
        4, 5, 4, 8, 8, 0, 3, 9, 8, 9, 2, 4, 3, 6, 5, 5, 1, 4, 9, 6, 0, 5, 3, 1,
        8, 6, 7, 8, 4, 1, 0, 9, 3, 8, 2, 1, 7, 3, 2, 9, 8, 6, 6, 4, 0, 8, 6, 8,
        6, 8, 3, 0, 5, 3, 0, 5, 8, 2, 8, 3, 6, 1, 7, 0, 8, 0, 1, 9, 7, 6, 9, 3,
        3, 5, 7, 1, 1, 2, 7, 8, 9, 6, 2, 2, 5, 1, 5, 6, 1, 8, 9, 6, 5, 5, 6, 8,
        5, 6, 6, 4, 5, 8, 9, 0, 3, 0, 6, 9, 1, 9, 8, 5, 8, 7, 5, 2, 2, 0, 1, 4,
        5, 0, 4, 3, 7, 7, 2, 7, 9, 6, 5, 2, 7, 8, 6, 9], device='cuda:0')
Softmax(
  dim=tensor([[-0.7500,  0.8598,  0.4259,  ...,  0.5049,  0.3378, -0.1787],
          [-0.4837,  0.8943,  0.1937,  

) tensor([5, 5, 7, 0, 2, 0, 5, 3, 7, 0, 3, 3, 2, 3, 7, 4, 2, 4, 8, 7, 7, 0, 3, 0,
        5, 8, 6, 1, 5, 2, 2, 9, 5, 9, 1, 9, 2, 4, 4, 6, 9, 1, 2, 2, 6, 5, 4, 3,
        4, 3, 9, 4, 6, 2, 1, 2, 3, 9, 8, 9, 6, 3, 4, 7, 8, 0, 1, 1, 3, 4, 6, 8,
        1, 9, 7, 8, 4, 7, 7, 0, 4, 9, 1, 5, 5, 5, 4, 2, 9, 8, 1, 7, 4, 6, 4, 9,
        2, 1, 7, 9, 4, 8, 5, 5, 2, 2, 3, 2, 0, 9, 3, 5, 6, 8, 5, 8, 0, 5, 8, 2,
        2, 1, 1, 6, 3, 9, 7, 9, 9, 7, 7, 3, 2, 9, 6, 5, 4, 5, 8, 4, 5, 3, 8, 2,
        8, 2, 5, 8, 8, 1, 4, 8, 6, 4, 0, 6, 4, 9, 3, 4, 9, 7, 1, 7, 9, 0, 4, 0,
        6, 4, 1, 2, 2, 8, 3, 3, 7, 4, 9, 6, 2, 9, 0, 2, 7, 2, 3, 2, 9, 1, 9, 3,
        5, 2, 2, 4, 7, 3, 6, 9, 9, 3, 3, 4, 2, 4, 6, 6, 7, 1, 3, 1, 1, 2, 3, 1,
        7, 0, 8, 1, 7, 0, 5, 1, 9, 0, 0, 2, 2, 8, 4, 8, 7, 2, 4, 1, 7, 3, 3, 6,
        1, 8, 6, 3, 9, 0, 3, 5, 2, 8, 0, 6, 3, 1, 5, 5], device='cuda:0')
Softmax(
  dim=tensor([[-0.5765,  0.6879,  0.5045,  ...,  0.3641,  0.9477, -0.1055],
          [-0.2793,  0.3467,  0.4879,  

) tensor([2, 5, 6, 3, 9, 7, 4, 2, 3, 1, 5, 6, 1, 5, 9, 9, 4, 9, 4, 6, 9, 4, 2, 4,
        3, 3, 7, 1, 3, 1, 4, 1, 3, 1, 2, 0, 3, 6, 8, 0, 8, 6, 4, 4, 0, 2, 0, 0,
        9, 6, 4, 2, 0, 2, 9, 2, 1, 0, 6, 1, 2, 1, 4, 3, 5, 2, 8, 3, 9, 1, 3, 7,
        4, 1, 7, 6, 7, 3, 7, 5, 7, 6, 2, 2, 6, 8, 9, 1, 1, 9, 3, 3, 5, 3, 8, 2,
        2, 0, 9, 3, 6, 0, 9, 1, 0, 0, 8, 5, 1, 1, 3, 5, 0, 7, 9, 9, 5, 7, 9, 7,
        6, 3, 9, 8, 1, 1, 0, 8, 0, 0, 2, 0, 0, 6, 4, 4, 9, 0, 8, 4, 3, 6, 8, 0,
        1, 7, 5, 1, 3, 7, 1, 2, 9, 3, 9, 1, 2, 0, 3, 2, 6, 3, 6, 1, 2, 1, 9, 2,
        3, 1, 7, 2, 6, 4, 5, 5, 1, 3, 8, 3, 0, 6, 2, 7, 6, 6, 9, 8, 3, 8, 5, 5,
        6, 9, 0, 5, 3, 2, 1, 6, 3, 6, 0, 6, 3, 6, 1, 2, 3, 5, 2, 9, 1, 3, 2, 8,
        0, 2, 4, 4, 0, 1, 8, 5, 4, 4, 2, 6, 2, 9, 8, 9, 5, 6, 1, 3, 4, 0, 3, 3,
        0, 2, 0, 3, 0, 8, 6, 4, 7, 9, 1, 1, 7, 8, 6, 5], device='cuda:0')
Softmax(
  dim=tensor([[-1.0027,  0.4796,  0.2547,  ...,  0.4936,  0.4757,  0.3081],
          [-1.2534,  0.7038,  0.9116,  

Softmax(
  dim=tensor([[-0.2717,  0.6778,  1.0017,  ...,  0.0075,  0.7912,  0.4029],
          [-1.2056,  0.4231,  0.6860,  ...,  0.4002,  1.2093,  0.3916],
          [-0.9191,  0.6569, -0.2015,  ..., -0.1717,  0.4217, -0.3322],
          ...,
          [-0.6666,  0.2774,  0.6470,  ..., -0.0352,  0.4634,  0.4499],
          [-0.6529,  0.5717,  0.2815,  ..., -0.1952,  0.7153,  0.0616],
          [-1.0896,  0.5206,  0.6041,  ...,  0.1558,  1.1752,  0.7149]],
         device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([1, 3, 3, 8, 7, 6, 1, 7, 8, 1, 1, 1, 2, 6, 6, 8, 3, 6, 1, 1, 6, 6, 7, 3,
        9, 8, 2, 9, 7, 7, 2, 5, 1, 3, 5, 4, 6, 9, 9, 2, 5, 8, 0, 0, 0, 8, 7, 1,
        8, 9, 6, 4, 0, 9, 9, 6, 1, 4, 1, 0, 3, 3, 1, 8, 4, 4, 5, 6, 2, 3, 4, 7,
        5, 3, 5, 0, 1, 9, 0, 2, 8, 7, 3, 0, 2, 4, 0, 6, 8, 9, 9, 8, 5, 5, 2, 5,
        5, 4, 9, 5, 1, 7, 8, 3, 1, 2, 3, 2, 5, 6, 7, 7, 4, 5, 8, 0, 1, 6, 3, 9,
        2, 2, 7, 3, 2, 8, 1, 9, 6, 6, 9, 0, 5, 7, 3, 0, 1, 4, 0, 7, 6, 1, 5, 4,
      

Softmax(
  dim=tensor([[-0.8074,  1.3949,  1.5651,  ...,  0.6779,  1.3563,  0.5372],
          [-0.4205,  0.1927,  0.7435,  ..., -0.0653,  1.0602, -0.3913],
          [-0.5571,  0.5033,  0.4991,  ...,  0.0106,  0.8045,  0.3071],
          ...,
          [-0.9551,  1.6439,  1.4682,  ...,  0.7028,  1.7628,  0.6797],
          [-1.2389,  0.4913,  1.0118,  ..., -0.2985,  0.5181,  0.2156],
          [-0.2059,  0.5791,  0.8969,  ...,  0.2101,  0.6144,  0.2893]],
         device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([8, 5, 8, 3, 3, 7, 1, 7, 2, 1, 4, 8, 0, 1, 5, 7, 6, 3, 4, 6, 2, 7, 9, 8,
        9, 2, 2, 1, 3, 3, 7, 6, 2, 7, 2, 2, 8, 6, 8, 9, 1, 7, 3, 0, 2, 0, 3, 1,
        4, 9, 1, 0, 3, 2, 2, 6, 1, 2, 8, 0, 9, 8, 2, 9, 1, 6, 9, 7, 7, 5, 5, 4,
        1, 3, 1, 9, 9, 7, 6, 7, 5, 3, 1, 3, 6, 4, 0, 2, 2, 5, 4, 3, 2, 9, 2, 9,
        0, 9, 7, 7, 9, 6, 4, 7, 6, 8, 4, 8, 6, 0, 0, 8, 7, 0, 0, 4, 8, 6, 4, 2,
        9, 8, 6, 3, 7, 6, 4, 2, 1, 7, 9, 5, 3, 1, 1, 1, 7, 2, 5, 4, 6, 1, 7, 2,
      

Softmax(
  dim=tensor([[-0.5260,  0.5984,  0.4748,  ...,  0.2079,  0.8289, -0.0132],
          [-0.7364,  0.3498,  0.6956,  ..., -0.0608,  0.7143, -0.2148],
          [-0.7326,  0.6165,  0.2095,  ...,  0.4211,  0.6925,  0.2833],
          ...,
          [-0.2671,  0.5720,  0.8993,  ...,  0.1324,  0.6190,  0.3555],
          [-0.6052,  0.5181,  0.8809,  ..., -0.1023,  0.7460,  0.3742],
          [-0.6364,  0.4967,  1.0830,  ...,  0.0540,  0.4087,  0.4766]],
         device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([9, 6, 2, 3, 4, 0, 6, 4, 2, 1, 0, 9, 9, 4, 1, 4, 7, 1, 6, 3, 0, 5, 0, 9,
        8, 6, 7, 6, 9, 9, 1, 6, 3, 6, 6, 8, 2, 3, 6, 8, 3, 0, 6, 0, 8, 3, 2, 7,
        0, 4, 6, 8, 3, 3, 4, 5, 3, 3, 3, 3, 9, 7, 0, 6, 2, 2, 0, 3, 4, 5, 3, 6,
        8, 0, 7, 1, 5, 2, 0, 2, 4, 0, 2, 0, 4, 5, 7, 1, 0, 4, 7, 1, 0, 3, 9, 7,
        5, 4, 4, 1, 6, 0, 8, 1, 6, 9, 5, 7, 2, 2, 2, 6, 3, 9, 4, 6, 1, 4, 0, 5,
        2, 0, 3, 9, 7, 2, 3, 0, 7, 3, 2, 8, 1, 3, 6, 8, 8, 0, 1, 6, 5, 0, 7, 6,
      

Softmax(
  dim=tensor([[-0.8249,  0.6190,  0.2746,  ..., -0.0602,  0.6799,  0.3413],
          [-1.1892,  0.4837,  0.0501,  ...,  0.0481,  0.7930,  0.5248],
          [-0.4823,  0.7779,  0.7496,  ..., -0.3040,  1.0380,  0.3640],
          ...,
          [-0.3983,  0.4736, -0.0245,  ..., -0.1523,  0.4146,  0.2011],
          [-0.0801,  0.1953, -0.0339,  ..., -0.0179,  0.2805, -0.1331],
          [-0.6328,  0.4273,  0.1940,  ...,  0.2445,  0.5524,  0.3870]],
         device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([0, 3, 9, 8, 1, 3, 1, 2, 8, 9, 0, 8, 4, 1, 2, 5, 8, 5, 6, 0, 1, 7, 1, 4,
        5, 7, 8, 9, 2, 7, 6, 5, 5, 5, 8, 4, 2, 3, 9, 8, 1, 8, 7, 4, 1, 7, 1, 9,
        9, 8, 4, 8, 2, 4, 7, 6, 6, 3, 2, 1, 7, 1, 7, 8, 8, 5, 0, 5, 1, 3, 3, 1,
        4, 8, 6, 1, 9, 7, 3, 0, 1, 7, 8, 7, 0, 1, 4, 1, 4, 8, 4, 1, 1, 8, 0, 4,
        0, 0, 0, 1, 4, 9, 3, 0, 8, 4, 7, 2, 4, 4, 5, 2, 3, 7, 9, 6, 9, 2, 5, 3,
        5, 0, 2, 0, 0, 2, 9, 3, 5, 3, 4, 4, 5, 4, 4, 0, 3, 4, 3, 8, 3, 2, 2, 9,
      

Softmax(
  dim=tensor([[-0.9822,  0.7520,  1.1349,  ...,  0.6550,  0.9011,  0.0928],
          [-0.9134,  0.7063,  0.5425,  ..., -0.0621,  0.7056, -0.3828],
          [-0.7658,  0.9392,  1.0262,  ...,  0.3614,  0.9637, -0.0538],
          ...,
          [-0.4725,  0.3699,  0.2041,  ..., -0.0184,  1.0198, -0.2464],
          [-1.0060,  0.8762,  0.4408,  ..., -0.3159,  0.8407, -0.3449],
          [ 0.0758,  0.2561,  0.3899,  ...,  0.0620,  0.1992,  0.2553]],
         device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([9, 3, 6, 9, 1, 1, 4, 2, 1, 5, 2, 7, 9, 8, 8, 5, 0, 0, 7, 2, 2, 0, 7, 3,
        0, 1, 8, 0, 4, 6, 2, 9, 0, 1, 6, 1, 5, 4, 4, 0, 9, 5, 7, 7, 3, 9, 7, 2,
        1, 9, 3, 6, 9, 1, 9, 4, 7, 1, 2, 5, 0, 6, 9, 6, 2, 7, 3, 3, 7, 5, 0, 4,
        0, 5, 7, 3, 2, 3, 2, 8, 3, 8, 7, 4, 5, 6, 8, 8, 0, 2, 9, 5, 4, 9, 0, 3,
        8, 2, 4, 8, 5, 8, 9, 3, 2, 8, 4, 1, 4, 7, 8, 3, 0, 0, 7, 6, 1, 8, 2, 2,
        5, 4, 4, 2, 3, 9, 1, 6, 8, 0, 8, 5, 1, 9, 8, 1, 8, 4, 6, 6, 6, 0, 0, 1,
      

) tensor([7, 1, 6, 0, 2, 5, 1, 3, 7, 9, 4, 6, 6, 7, 7, 7, 9, 8, 7, 9, 7, 2, 6, 8,
        8, 0, 8, 9, 0, 9, 2, 4, 2, 4, 7, 1, 6, 3, 2, 2, 7, 1, 3, 7, 5, 7, 8, 9,
        3, 9, 0, 0, 1, 6, 1, 3, 5, 8, 1, 2, 8, 8, 4, 7, 1, 3, 5, 1, 6, 5, 8, 9,
        7, 6, 4, 4, 8, 1, 3, 6, 9, 5, 1, 6, 4, 8, 1, 3, 8, 1, 7, 8, 6, 9, 4, 5,
        6, 7, 6, 1, 1, 3, 2, 8, 7, 7, 8, 9, 3, 2, 2, 5, 7, 1, 0, 9, 3, 4, 7, 7,
        9, 3, 2, 3, 9, 0, 4, 9, 6, 5, 6, 7, 0, 7, 1, 8, 4, 5, 4, 9, 8, 6, 1, 4,
        7, 6, 4, 5, 0, 2, 4, 8, 8, 1, 1, 7, 4, 4, 2, 0, 9, 1, 0, 2, 7, 8, 8, 2,
        5, 1, 3, 2, 2, 1, 7, 9, 6, 0, 0, 3, 0, 1, 4, 1, 1, 5, 0, 9, 2, 5, 1, 9,
        1, 7, 5, 2, 9, 0, 8, 9, 9, 1, 5, 8, 5, 8, 6, 1, 3, 2, 5, 1, 6, 3, 9, 1,
        4, 2, 5, 5, 1, 2, 6, 5, 7, 8, 2, 3, 1, 4, 4, 5, 3, 2, 5, 3, 4, 4, 7, 7,
        6, 6, 3, 2, 1, 7, 2, 5, 5, 9, 5, 5, 0, 0, 5, 7], device='cuda:0')
Softmax(
  dim=tensor([[-0.8154,  0.8069,  0.2735,  ...,  0.0772,  0.9192,  0.0309],
          [-0.5430,  1.2867,  0.5069,  

) tensor([2, 6, 0, 9, 3, 5, 3, 9, 3, 3, 3, 3, 6, 8, 7, 2, 2, 4, 7, 9, 5, 7, 7, 3,
        1, 4, 0, 4, 4, 8, 5, 0, 1, 1, 5, 5, 4, 0, 8, 3, 1, 8, 0, 5, 2, 5, 7, 6,
        4, 6, 8, 9, 1, 9, 7, 7, 0, 6, 3, 1, 9, 1, 0, 1, 1, 0, 1, 3, 5, 2, 8, 6,
        7, 5, 1, 0, 9, 7, 0, 5, 6, 0, 6, 1, 8, 7, 8, 2, 8, 0, 5, 4, 2, 2, 4, 6,
        1, 6, 4, 7, 2, 1, 3, 8, 8, 4, 5, 6, 6, 3, 4, 6, 4, 9, 3, 3, 5, 8, 2, 4,
        8, 2, 3, 7, 4, 2, 4, 8, 0, 4, 5, 5, 7, 3, 9, 6, 1, 8, 6, 8, 1, 3, 4, 1,
        9, 4, 3, 9, 9, 1, 5, 0, 7, 9, 6, 4, 5, 6, 2, 1, 9, 7, 1, 1, 5, 6, 9, 5,
        5, 8, 8, 0, 2, 7, 5, 7, 9, 8, 2, 5, 6, 1, 2, 9, 6, 4, 7, 1, 4, 4, 8, 8,
        1, 6, 2, 8, 5, 6, 7, 0, 9, 5, 7, 4, 1, 1, 0, 7, 2, 5, 1, 7, 9, 1, 0, 6,
        7, 5, 3, 4, 0, 6, 2, 9, 3, 9, 4, 3, 7, 6, 7, 5, 4, 4, 4, 9, 3, 7, 4, 8,
        1, 2, 3, 2, 0, 3, 4, 6, 0, 1, 8, 7, 6, 2, 1, 6], device='cuda:0')
Softmax(
  dim=tensor([[-7.1586e-01,  8.4571e-01,  4.8343e-01,  ...,  3.1615e-01,
            4.0301e-01,  5.2275e-02],
    

Softmax(
  dim=tensor([[-0.3986,  0.6023,  0.5681,  ...,  0.3093,  0.3907,  0.5316],
          [-0.6451,  1.0398,  1.1338,  ..., -0.1403,  0.8885,  0.2762],
          [-0.6313,  0.4813,  0.7710,  ...,  0.1452,  1.0405,  0.2190],
          ...,
          [-0.3620, -0.2299,  0.1910,  ..., -0.2300,  0.5578,  0.0584],
          [-0.5426,  1.0010,  0.7530,  ...,  0.2257,  0.3001,  0.5143],
          [-0.3939,  0.4724,  0.8392,  ...,  0.4302,  0.3165,  0.2108]],
         device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([2, 7, 7, 0, 9, 2, 4, 5, 7, 3, 8, 4, 7, 7, 5, 4, 6, 6, 4, 3, 8, 8, 5, 7,
        0, 1, 9, 3, 9, 2, 8, 3, 8, 0, 2, 4, 5, 0, 4, 8, 1, 0, 5, 1, 2, 4, 6, 1,
        8, 7, 9, 1, 3, 1, 5, 3, 6, 8, 9, 4, 6, 5, 5, 6, 3, 8, 7, 6, 9, 8, 9, 0,
        3, 8, 2, 2, 3, 7, 5, 8, 4, 4, 8, 7, 8, 7, 2, 7, 9, 1, 7, 5, 0, 3, 8, 7,
        7, 5, 5, 5, 0, 1, 8, 1, 7, 8, 8, 3, 9, 1, 3, 6, 1, 7, 6, 3, 1, 2, 4, 2,
        1, 4, 0, 3, 2, 4, 7, 3, 7, 0, 4, 8, 3, 6, 7, 2, 4, 6, 5, 2, 3, 3, 1, 1,
      

) tensor([3, 6, 0, 5, 4, 5, 3, 2, 3, 9, 1, 4, 6, 7, 4, 8, 8, 0, 2, 8, 8, 1, 7, 6,
        2, 3, 7, 8, 3, 2, 0, 1, 8, 5, 0, 7, 3, 7, 5, 1, 5, 7, 6, 7, 9, 0, 4, 5,
        5, 1, 1, 6, 1, 9, 9, 6, 2, 2, 6, 4, 9, 6, 6, 1, 9, 3, 5, 6, 8, 2, 5, 4,
        3, 0, 7, 6, 7, 2, 0, 8, 6, 9, 2, 2, 3, 6, 3, 2, 8, 7, 4, 7, 8, 9, 6, 5,
        1, 1, 6, 8, 7, 3, 2, 3, 7, 3, 2, 4, 9, 4, 3, 1, 3, 1, 5, 8, 1, 8, 5, 3,
        0, 5, 0, 0, 5, 7, 9, 8, 2, 5, 6, 0, 6, 4, 9, 3, 2, 0, 6, 1, 8, 8, 0, 7,
        1, 7, 8, 9, 9, 9, 8, 9, 3, 4, 7, 7, 7, 5, 6, 0, 4, 5, 2, 4, 5, 1, 8, 3,
        6, 1, 0, 3, 0, 0, 1, 3, 9, 9, 5, 7, 7, 0, 1, 7, 3, 0, 2, 0, 7, 3, 7, 2,
        7, 7, 9, 3, 5, 9, 4, 6, 0, 3, 6, 4, 7, 3, 4, 1, 0, 1, 8, 4, 4, 2, 9, 3,
        5, 0, 1, 8, 0, 4, 6, 4, 9, 3, 3, 1, 3, 7, 1, 2, 6, 4, 6, 2, 1, 8, 6, 3,
        2, 5, 4, 5, 5, 3, 2, 2, 4, 9, 4, 7, 7, 6, 9, 4], device='cuda:0')
Softmax(
  dim=tensor([[-2.5401e-01,  5.9880e-01,  3.2424e-01,  ...,  1.6979e-01,
            7.3920e-01, -2.4906e-02],
    

) tensor([1, 6, 7, 0, 3, 9, 0, 2, 3, 4, 8, 1, 3, 0, 1, 8, 2, 9, 8, 6, 4, 7, 0, 1,
        6, 6, 3, 7, 9, 3, 0, 3, 4, 3, 2, 0, 3, 0, 6, 7, 3, 7, 3, 9, 3, 5, 1, 6,
        2, 1, 8, 9, 1, 0, 2, 8, 8, 8, 0, 2, 6, 8, 0, 0, 8, 3, 2, 3, 8, 9, 9, 7,
        0, 1, 8, 5, 3, 9, 2, 9, 3, 2, 2, 2, 7, 8, 8, 0, 4, 8, 5, 5, 2, 7, 8, 3,
        3, 4, 8, 4, 1, 6, 0, 4, 4, 0, 7, 3, 4, 3, 5, 6, 2, 7, 4, 0, 9, 8, 2, 5,
        2, 3, 7, 6, 8, 2, 1, 4, 6, 5, 1, 6, 4, 0, 6, 5, 7, 7, 0, 8, 2, 0, 9, 0,
        9, 8, 2, 3, 5, 1, 0, 3, 4, 0, 5, 1, 2, 3, 7, 1, 6, 2, 5, 3, 1, 5, 5, 8,
        4, 5, 4, 6, 0, 3, 4, 6, 4, 1, 5, 8, 8, 4, 7, 0, 1, 5, 4, 9, 6, 0, 5, 6,
        0, 7, 7, 8, 3, 0, 4, 1, 6, 1, 1, 8, 4, 2, 7, 8, 8, 9, 4, 0, 3, 1, 4, 1,
        0, 9, 7, 1, 1, 9, 2, 8, 4, 8, 3, 6, 4, 8, 6, 1, 4, 9, 9, 8, 3, 8, 2, 6,
        1, 0, 1, 7, 7, 1, 9, 7, 1, 9, 6, 0, 9, 9, 4, 6], device='cuda:0')
Softmax(
  dim=tensor([[-0.1116,  0.0981,  0.7398,  ..., -0.1524,  0.8513,  0.4736],
          [-0.4163,  0.8130,  0.5981,  

) tensor([5, 1, 5, 8, 4, 0, 6, 3, 2, 5, 3, 2, 3, 2, 9, 3, 4, 1, 0, 5, 2, 7, 5, 7,
        6, 4, 2, 6, 7, 3, 5, 7, 4, 4, 7, 5, 7, 9, 8, 7, 5, 6, 1, 7, 8, 5, 7, 0,
        4, 7, 4, 8, 7, 6, 6, 9, 5, 9, 0, 7, 5, 0, 6, 9, 0, 8, 3, 6, 6, 4, 1, 9,
        3, 9, 7, 2, 0, 8, 8, 1, 2, 0, 5, 6, 8, 1, 0, 8, 4, 9, 9, 8, 5, 5, 3, 8,
        5, 2, 1, 9, 2, 8, 1, 6, 1, 9, 5, 3, 1, 3, 0, 5, 2, 4, 1, 0, 4, 6, 2, 3,
        6, 6, 9, 1, 4, 6, 0, 4, 5, 4, 0, 4, 5, 5, 3, 2, 1, 8, 8, 4, 1, 0, 0, 6,
        7, 9, 6, 7, 6, 6, 4, 7, 9, 7, 4, 8, 2, 8, 2, 8, 9, 3, 9, 0, 6, 4, 8, 4,
        0, 8, 7, 9, 2, 1, 1, 0, 1, 7, 8, 1, 5, 0, 9, 6, 4, 9, 2, 6, 4, 1, 7, 2,
        6, 1, 0, 5, 4, 5, 5, 3, 9, 0, 4, 8, 7, 7, 8, 3, 5, 7, 7, 5, 7, 2, 1, 9,
        6, 0, 2, 6, 0, 8, 8, 5, 5, 8, 4, 5, 4, 9, 2, 4, 7, 4, 6, 8, 3, 2, 5, 5,
        9, 3, 6, 7, 4, 6, 6, 2, 4, 1, 2, 8, 0, 5, 7, 9], device='cuda:0')
Softmax(
  dim=tensor([[-0.7023,  0.7945,  0.2325,  ...,  0.2246,  0.4591,  0.2090],
          [-1.2541,  0.2700,  0.5416,  

Softmax(
  dim=tensor([[-0.6938,  0.8342,  0.5641,  ...,  0.0840,  0.3323, -0.1637],
          [-0.5071,  0.1279,  0.6403,  ..., -0.1276,  0.5739, -0.1139],
          [-1.0314,  1.2241,  0.4035,  ...,  0.3091,  1.1626,  0.4002],
          ...,
          [-0.4186,  0.9051,  1.0647,  ...,  0.0376,  1.1916,  0.3733],
          [-1.2950,  0.7553, -0.1216,  ...,  0.0459,  0.5346,  0.2259],
          [-0.7178,  0.7286,  1.5161,  ...,  0.4295,  0.5966,  0.3931]],
         device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([7, 6, 2, 5, 3, 2, 4, 4, 6, 7, 4, 2, 1, 0, 2, 5, 5, 2, 0, 1, 3, 1, 2, 0,
        5, 3, 7, 7, 0, 1, 9, 5, 0, 6, 7, 8, 8, 9, 2, 3, 7, 7, 1, 7, 1, 2, 2, 9,
        1, 5, 2, 1, 1, 8, 3, 6, 0, 4, 9, 0, 9, 7, 9, 7, 3, 1, 3, 0, 1, 7, 4, 3,
        9, 9, 6, 8, 3, 3, 3, 8, 4, 1, 9, 3, 1, 8, 6, 7, 1, 4, 4, 6, 4, 1, 7, 4,
        6, 2, 6, 5, 3, 7, 8, 1, 5, 4, 6, 8, 1, 3, 6, 7, 9, 0, 6, 1, 1, 3, 8, 3,
        3, 8, 1, 5, 0, 2, 8, 1, 9, 3, 3, 8, 9, 4, 8, 2, 7, 9, 0, 9, 2, 8, 0, 8,
      

Softmax(
  dim=tensor([[-6.7723e-01,  8.7163e-01,  1.1489e+00,  ..., -1.2029e-01,
            3.8999e-01,  8.4272e-01],
          [-9.4323e-01,  5.7551e-01, -6.6345e-02,  ...,  1.3814e-02,
            7.4579e-01,  4.0356e-01],
          [-1.0917e+00,  1.1746e+00,  8.7459e-01,  ..., -1.1341e-01,
            6.4630e-01,  1.5825e-01],
          ...,
          [-7.8555e-01,  1.1320e+00,  7.4195e-01,  ...,  2.4186e-01,
            5.9156e-01,  3.0178e-01],
          [-4.4477e-01,  8.7536e-01,  1.4710e+00,  ..., -4.9809e-02,
            1.0356e+00, -7.1919e-04],
          [-3.2132e-01,  1.0230e+00,  6.3812e-01,  ...,  1.4386e-01,
            6.1442e-01,  1.6986e-01]], device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([8, 5, 8, 6, 8, 6, 0, 3, 9, 0, 5, 1, 7, 8, 4, 0, 1, 1, 9, 3, 1, 2, 1, 5,
        6, 1, 8, 0, 0, 3, 5, 9, 8, 0, 9, 0, 3, 2, 0, 1, 3, 9, 9, 3, 3, 4, 3, 0,
        8, 5, 4, 5, 3, 6, 9, 2, 2, 2, 1, 9, 6, 4, 9, 5, 6, 5, 6, 9, 8, 2, 4, 9,
        2, 0, 6, 7, 2, 2, 8, 3, 1, 2, 3, 9, 1

Softmax(
  dim=tensor([[-0.9572,  0.5645,  0.6898,  ..., -0.0388,  1.0414, -0.1012],
          [-0.5744,  0.8242,  0.8727,  ...,  0.4371,  0.4803,  0.1343],
          [-0.4690,  0.5158,  0.6191,  ...,  0.0824,  0.4839,  0.1113],
          ...,
          [-0.5327,  0.4883,  0.8891,  ...,  0.2892,  0.8166, -0.1901],
          [-0.3624,  1.0036,  0.3324,  ...,  0.2310,  0.4252, -0.0685],
          [-0.8621,  1.1058,  0.3771,  ...,  0.1403,  1.2338, -0.2441]],
         device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([8, 7, 2, 1, 5, 8, 1, 1, 2, 0, 7, 6, 7, 5, 5, 7, 0, 8, 7, 1, 3, 2, 0, 7,
        0, 3, 8, 7, 8, 3, 8, 8, 9, 2, 2, 2, 5, 3, 2, 4, 7, 6, 7, 9, 6, 7, 6, 7,
        0, 4, 4, 4, 8, 1, 1, 7, 5, 7, 2, 7, 9, 4, 0, 3, 3, 7, 8, 4, 9, 1, 4, 9,
        1, 8, 2, 8, 7, 3, 6, 0, 9, 6, 6, 0, 7, 3, 5, 9, 0, 6, 9, 5, 3, 6, 3, 9,
        2, 6, 1, 0, 7, 7, 4, 2, 4, 3, 1, 9, 4, 9, 5, 2, 7, 6, 7, 6, 7, 0, 7, 8,
        1, 9, 3, 0, 6, 4, 1, 6, 7, 8, 7, 4, 5, 9, 2, 6, 7, 6, 8, 0, 8, 5, 0, 3,
      

Softmax(
  dim=tensor([[-5.1410e-01,  6.8177e-01,  8.5244e-01,  ..., -1.3031e-01,
            6.2278e-01,  8.8491e-02],
          [-4.4412e-01,  8.6202e-01,  7.6443e-01,  ...,  2.7959e-01,
            1.9496e-01,  1.7367e-01],
          [-7.8899e-01,  5.9139e-01,  4.3470e-01,  ...,  2.4021e-01,
            4.1667e-01,  7.1567e-01],
          ...,
          [-6.3954e-01,  5.6867e-01,  3.5228e-01,  ..., -1.8041e-04,
            5.3155e-01, -1.3996e-01],
          [-6.5856e-01,  1.3819e+00,  1.0683e+00,  ...,  6.5568e-02,
            2.1359e-01,  1.4412e-01],
          [-9.4113e-01,  9.9940e-01,  7.2637e-01,  ..., -6.2760e-03,
            7.1874e-01,  2.8094e-01]], device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([7, 9, 7, 8, 5, 7, 5, 8, 2, 6, 9, 3, 0, 8, 1, 6, 9, 9, 3, 2, 6, 6, 5, 1,
        2, 4, 1, 3, 0, 5, 8, 7, 3, 6, 8, 2, 0, 7, 5, 1, 4, 7, 3, 0, 5, 2, 9, 8,
        7, 1, 9, 8, 5, 4, 0, 0, 4, 2, 2, 7, 1, 0, 4, 8, 6, 4, 4, 1, 3, 5, 8, 9,
        7, 7, 6, 5, 7, 6, 8, 4, 2, 2, 3, 5, 7

Softmax(
  dim=tensor([[-0.8891,  0.9100,  0.5014,  ..., -0.2032,  1.0252, -0.0486],
          [-0.6777,  0.5967,  1.2926,  ...,  0.6739,  0.3368,  0.5751],
          [-0.8168,  0.8449,  1.0644,  ...,  0.0175,  0.3948, -0.0268],
          ...,
          [-0.8671,  0.9294,  0.4233,  ..., -0.1295,  0.0205, -0.0218],
          [-1.5719,  0.5667,  0.0086,  ..., -0.2458,  1.0866,  0.6633],
          [-0.6887,  1.2711,  0.8727,  ...,  0.0368,  0.4125,  0.4006]],
         device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([3, 7, 8, 9, 5, 4, 5, 1, 1, 9, 9, 9, 4, 3, 7, 3, 1, 6, 4, 8, 3, 9, 6, 3,
        0, 3, 3, 7, 4, 8, 9, 6, 7, 3, 3, 4, 1, 3, 5, 9, 9, 6, 0, 6, 7, 0, 0, 6,
        7, 2, 1, 2, 7, 7, 1, 7, 1, 9, 4, 2, 9, 6, 6, 3, 5, 1, 7, 1, 3, 5, 4, 9,
        4, 5, 3, 0, 9, 4, 4, 4, 8, 1, 8, 8, 7, 9, 3, 2, 3, 1, 8, 6, 1, 1, 2, 3,
        3, 6, 2, 0, 9, 5, 8, 3, 1, 2, 1, 3, 5, 1, 9, 5, 2, 5, 7, 9, 9, 6, 3, 1,
        1, 4, 1, 3, 2, 1, 0, 6, 9, 8, 3, 2, 6, 1, 7, 3, 2, 1, 4, 8, 8, 1, 4, 8,
      

Softmax(
  dim=tensor([[-0.4710,  0.5275,  0.9901,  ..., -0.0943,  0.5084,  0.1498],
          [-1.0425,  0.3420,  0.0419,  ..., -0.1566,  1.0094, -0.0328],
          [-0.8632,  0.9286,  0.6628,  ..., -0.1514,  1.0122,  0.0165],
          ...,
          [-0.7987,  0.5788,  1.0334,  ..., -0.0248,  0.1089, -0.0739],
          [-0.7376,  0.3436,  0.9307,  ...,  0.1404,  1.1459,  0.6640],
          [-0.4815,  0.4808,  0.6258,  ...,  0.0946,  0.6533,  0.0244]],
         device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([5, 0, 9, 7, 0, 3, 6, 3, 7, 0, 4, 4, 3, 3, 1, 1, 7, 6, 7, 6, 4, 4, 1, 7,
        9, 0, 6, 1, 4, 7, 6, 1, 4, 2, 7, 4, 9, 3, 1, 1, 1, 9, 1, 5, 4, 2, 0, 6,
        9, 7, 3, 5, 0, 7, 9, 4, 5, 1, 5, 7, 7, 2, 7, 1, 5, 8, 9, 5, 2, 9, 5, 7,
        3, 2, 1, 5, 5, 7, 6, 0, 6, 3, 3, 1, 2, 5, 2, 5, 9, 0, 1, 8, 1, 1, 2, 0,
        8, 3, 7, 3, 2, 7, 6, 9, 7, 6, 2, 8, 3, 6, 6, 2, 6, 4, 1, 4, 1, 3, 0, 9,
        5, 7, 5, 1, 2, 8, 3, 0, 2, 8, 9, 7, 6, 1, 7, 7, 7, 2, 8, 4, 5, 8, 2, 2,
      

Softmax(
  dim=tensor([[-0.5827,  0.6600,  0.9543,  ...,  0.1589,  0.6378,  0.3976],
          [-0.1813,  0.3794,  0.5019,  ...,  0.6261,  1.1172, -0.0594],
          [-0.7525,  1.2949,  0.7246,  ...,  0.2867,  1.0772, -0.0311],
          ...,
          [-0.5870,  0.7429,  0.7739,  ..., -0.5444,  0.7019,  0.1045],
          [-0.8561,  0.2883,  0.5736,  ..., -0.2456,  1.1221,  0.4946],
          [-0.8708,  0.9504,  0.2249,  ...,  0.4517,  1.9271,  0.0710]],
         device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([1, 2, 6, 0, 0, 7, 2, 6, 2, 2, 5, 2, 1, 4, 8, 4, 3, 5, 6, 8, 6, 1, 8, 1,
        9, 2, 0, 3, 1, 4, 4, 7, 2, 3, 1, 9, 7, 4, 6, 4, 5, 2, 7, 1, 0, 7, 8, 0,
        6, 9, 6, 3, 4, 2, 6, 9, 5, 0, 5, 0, 8, 1, 2, 2, 7, 0, 2, 7, 5, 7, 7, 1,
        3, 2, 4, 9, 6, 8, 4, 6, 9, 6, 6, 0, 8, 0, 4, 4, 3, 5, 7, 1, 2, 3, 4, 7,
        7, 2, 9, 8, 4, 7, 6, 1, 7, 1, 8, 1, 5, 6, 8, 4, 3, 1, 1, 4, 9, 0, 5, 1,
        8, 0, 9, 1, 9, 6, 1, 0, 5, 8, 5, 6, 2, 3, 8, 0, 7, 6, 3, 2, 7, 6, 9, 2,
      

Softmax(
  dim=tensor([[-3.8439e-01,  5.8438e-01,  7.2151e-01,  ...,  5.8741e-02,
            3.9796e-01,  3.4976e-01],
          [-5.4822e-01,  4.8531e-01,  1.0309e-01,  ..., -1.8507e-01,
            3.5523e-01,  6.1354e-02],
          [-5.4092e-01,  5.6616e-01,  3.3872e-01,  ...,  6.6161e-01,
            9.3435e-01,  2.5074e-01],
          ...,
          [-8.6795e-01,  7.4951e-01,  1.1077e+00,  ...,  9.0758e-04,
            2.0523e-01,  3.3940e-01],
          [-3.2305e-01,  5.7539e-01,  9.3814e-01,  ..., -2.4705e-01,
            6.2902e-01, -2.3675e-02],
          [-9.8789e-01,  6.5571e-01, -7.2957e-02,  ...,  3.2444e-01,
            3.1769e-01, -2.2497e-01]], device='cuda:0', grad_fn=<AddmmBackward>)
) tensor([1, 0, 7, 1, 4, 1, 3, 3, 9, 6, 4, 7, 8, 0, 0, 8, 0, 1, 1, 0, 0, 0, 9, 2,
        5, 7, 5, 4, 1, 4, 0, 8, 8, 2, 7, 4, 5, 3, 7, 2, 9, 6, 6, 3, 9, 6, 9, 5,
        1, 5, 3, 5, 8, 9, 0, 7, 6, 6, 6, 3, 9, 4, 9, 2, 2, 7, 4, 6, 9, 8, 2, 3,
        5, 3, 1, 7, 0, 7, 0, 1, 6, 2, 2, 4, 0

) tensor([1, 2, 4, 8, 4, 9, 0, 7, 1, 2, 0, 9, 6, 3, 8, 6, 0, 6, 9, 3, 5, 5, 8, 8,
        2, 2, 3, 5, 9, 6, 5, 9, 1, 6, 2, 2, 1, 3, 4, 7, 4, 9, 4, 3, 4, 4, 4, 3,
        3, 3, 3, 4, 7, 6, 4, 4, 0, 1, 1, 7, 1, 3, 9, 6, 8, 5, 7, 4, 2, 2, 3, 5,
        0, 9, 3, 0, 1, 8, 2, 0, 2, 6, 6, 4, 0, 8, 0, 9, 3, 6, 1, 7, 8, 9, 2, 3,
        4, 0, 0, 9, 3, 7, 4, 0, 7, 7, 2, 0, 6, 4, 6, 6, 0, 2, 8, 6, 3, 9, 0, 9,
        1, 0, 8, 1, 5, 2, 5, 1, 9, 7, 3, 8, 5, 4, 8, 8, 0, 9, 1, 5, 7, 0, 7, 1,
        1, 8, 0, 4, 1, 5, 2, 5, 8, 5, 7, 4, 4, 3, 0, 0, 3, 0, 8, 8, 4, 5, 5, 6,
        0, 1, 1, 2, 2, 5, 0, 1, 6, 9, 5, 7, 4, 1, 4, 0, 3, 2, 4, 6, 3, 8, 4, 1,
        7, 2, 6, 3, 5, 8, 0, 8, 7, 5, 3, 9, 9, 0, 7, 9, 8, 5, 2, 4, 7, 6, 8, 8,
        6, 4, 4, 7, 6, 5, 5, 3, 6, 2, 0, 1, 7, 5, 0, 2, 8, 3, 3, 4, 1, 0, 4, 5,
        4, 1, 0, 0, 1, 0, 6, 1, 4, 2, 8, 0, 4, 2, 8, 3], device='cuda:0')
Softmax(
  dim=tensor([[-0.4947,  0.5576,  0.7582,  ...,  0.4449,  0.7066,  0.1868],
          [-0.4921,  0.1166,  0.2605,  

) tensor([1, 0, 7, 2, 1, 2, 0, 7, 2, 1, 3, 9, 7, 5, 5, 6, 3, 4, 3, 7, 1, 6, 4, 5,
        0, 5, 8, 9, 4, 8, 2, 2, 6, 4, 7, 9, 9, 1, 7, 5, 4, 7, 3, 2, 5, 1, 3, 5,
        8, 6, 1, 9, 1, 9, 1, 4, 3, 7, 2, 9, 6, 3, 2, 8, 2, 8, 8, 3, 4, 1, 0, 2,
        1, 2, 4, 7, 3, 5, 7, 8, 9, 6, 3, 7, 9, 8, 7, 7, 8, 7, 3, 2, 3, 7, 6, 6,
        4, 9, 4, 3, 6, 4, 2, 7, 8, 3, 2, 1, 1, 3, 3, 8, 5, 8, 1, 4, 2, 0, 0, 9,
        6, 0, 3, 9, 8, 0, 7, 4, 9, 1, 9, 8, 6, 0, 5, 9, 6, 4, 6, 8, 4, 0, 7, 5,
        4, 5, 4, 5, 8, 4, 8, 9, 1, 4, 0, 5, 1, 8, 0, 0, 4, 1, 7, 8, 7, 5, 2, 6,
        0, 3, 9, 7, 4, 9, 3, 3, 6, 9, 8, 7, 2, 5, 8, 7, 0, 0, 7, 9, 7, 1, 1, 2,
        0, 7, 0, 2, 0, 1, 8, 2, 7, 3, 3, 5, 7, 8, 1, 1, 9, 4, 3, 2, 6, 2, 7, 1,
        0, 1, 6, 7, 1, 5, 4, 8, 3, 1, 3, 1, 6, 6, 5, 8, 1, 9, 1, 4, 4, 3, 7, 0,
        4, 5, 6, 1, 1, 3, 9, 7, 1, 0, 0, 1, 4, 9, 1, 4], device='cuda:0')
Softmax(
  dim=tensor([[-0.6284,  0.6794,  0.5779,  ..., -0.0710,  1.2402, -0.0895],
          [-0.8192,  0.9425,  0.9017,  

) tensor([0, 1, 7, 1, 2, 8, 8, 0, 9, 9, 1, 7, 0, 4, 4, 4, 6, 9, 5, 3, 7, 5, 7, 8,
        8, 0, 9, 8, 0, 7, 3, 2, 1, 4, 9, 4, 5, 2, 2, 6, 8, 4, 4, 6, 4, 7, 4, 5,
        2, 6, 9, 5, 8, 5, 7, 2, 6, 0, 8, 8, 4, 2, 1, 7, 7, 5, 4, 6, 4, 0, 1, 3,
        6, 9, 5, 1, 8, 6, 4, 1, 7, 4, 5, 2, 7, 0, 5, 8, 1, 0, 5, 3, 1, 6, 6, 0,
        6, 8, 5, 1, 9, 8, 1, 6, 3, 9, 1, 1, 7, 1, 1, 4, 3, 8, 6, 5, 6, 1, 3, 5,
        7, 5, 5, 2, 4, 5, 0, 5, 7, 8, 7, 9, 7, 2, 3, 0, 4, 6, 3, 7, 5, 8, 7, 6,
        9, 6, 9, 6, 3, 6, 3, 9, 5, 8, 0, 2, 2, 9, 3, 5, 2, 0, 9, 6, 6, 1, 2, 2,
        1, 7, 3, 3, 4, 0, 2, 9, 7, 5, 7, 6, 6, 7, 6, 7, 5, 4, 9, 7, 9, 2, 5, 0,
        8, 2, 2, 1, 9, 3, 4, 9, 1, 6, 8, 7, 8, 4, 8, 5, 7, 6, 5, 5, 1, 7, 1, 1,
        0, 3, 9, 1, 2, 5, 3, 6, 4, 0, 7, 6, 6, 9, 8, 8, 5, 0, 0, 8, 9, 1, 0, 1,
        0, 6, 3, 8, 6, 9, 8, 9, 4, 1, 2, 1, 8, 2, 4, 7], device='cuda:0')
Softmax(
  dim=tensor([[-0.7625,  0.3327,  0.1062,  ...,  0.2148,  0.4599, -0.1259],
          [-0.5141,  0.5786, -0.0123,  

) tensor([8, 4, 1, 3, 3, 5, 9, 0, 9, 8, 4, 3, 3, 6, 4, 6, 3, 7, 0, 7, 1, 7, 1, 5,
        6, 8, 4, 6, 4, 9, 3, 6, 5, 7, 1, 4, 1, 1, 9, 4, 4, 7, 4, 5, 7, 9, 5, 7,
        4, 4, 6, 7, 6, 8, 3, 0, 6, 8, 7, 5, 8, 6, 3, 5, 4, 2, 9, 6, 6, 6, 8, 5,
        6, 8, 7, 2, 2, 2, 9, 4, 1, 4, 9, 6, 7, 4, 4, 7, 1, 5, 4, 7, 4, 2, 1, 0,
        7, 4, 1, 7, 5, 2, 7, 5, 2, 4, 8, 7, 8, 5, 0, 8, 6, 7, 8, 9, 8, 1, 8, 1,
        4, 2, 2, 2, 7, 2, 7, 9, 3, 3, 1, 2, 6, 4, 9, 6, 8, 8, 8, 1, 9, 7, 5, 0,
        7, 3, 3, 5, 7, 2, 8, 8, 1, 1, 7, 2, 4, 0, 7, 0, 4, 7, 5, 1, 5, 5, 7, 8,
        4, 7, 9, 9, 5, 9, 6, 2, 3, 6, 8, 9, 7, 4, 2, 4, 2, 3, 2, 2, 5, 1, 2, 0,
        2, 5, 0, 1, 6, 9, 9, 6, 4, 3, 8, 8, 2, 9, 1, 7, 1, 6, 9, 1, 0, 2, 2, 1,
        3, 6, 5, 2, 5, 4, 2, 2, 4, 5, 9, 1, 7, 7, 3, 1, 7, 2, 7, 9, 8, 8, 0, 9,
        4, 5, 3, 0, 1, 2, 0, 8, 4, 6, 2, 3, 3, 7, 7, 4], device='cuda:0')
Softmax(
  dim=tensor([[-0.9887,  1.0526,  0.6536,  ...,  0.4098,  0.6424,  0.1296],
          [-0.9050,  0.6413,  0.7529,  

Softmax(
  dim=tensor([[-7.1088e-01,  9.5920e-01,  1.0567e+00,  4.2680e-01,  4.4589e-01,
            2.2144e-01,  1.2056e-02,  1.8143e-01,  9.0958e-01,  6.0839e-01],
          [-5.3516e-01,  7.6191e-01,  4.9337e-01, -3.9698e-01,  5.7932e-01,
           -1.3407e-02,  6.5970e-02,  2.6507e-01,  9.2952e-01,  5.8507e-02],
          [-8.3201e-01,  9.5836e-01,  1.4295e-01,  7.2273e-01, -5.4530e-01,
            8.8747e-02, -3.8158e-02,  3.2404e-01,  6.5928e-01, -4.1097e-01],
          [-2.8033e-01,  6.2647e-01,  6.0917e-01,  2.3754e-01, -5.3750e-01,
            7.4221e-02, -9.1947e-02,  2.4070e-01,  8.2928e-01,  5.1299e-01],
          [-5.3892e-01,  4.5337e-01,  4.5746e-01,  8.8502e-02,  2.5214e-01,
            4.2607e-02, -4.0129e-01,  5.0772e-02,  6.1074e-01,  1.2619e-01],
          [-3.7830e-01,  6.4723e-01,  4.9888e-01,  4.3075e-01,  1.6821e-01,
            6.1384e-01, -1.9434e-01,  7.7192e-02,  3.0234e-01, -2.7993e-01],
          [-1.4802e+00,  5.3370e-01,  6.0265e-01,  4.9472e-01,  5.199

In [47]:
a = 0x0011

In [45]:
b = 0b0101

In [41]:
print(a)

b'\x00x0011'


In [39]:
a

b'\x00b0011'

In [42]:
c = 'abcd dbad asbdfds dasfb'

In [44]:
c.replace('d','D',100)

'abcD DbaD asbDfDs Dasfb'

In [50]:
d = bin(a+b)

In [53]:
a+eval(d)

39