In [80]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchtext.datasets import Multi30k
from torchtext.data import Field, BucketIterator
import spacy
import numpy as np

import random
import math
import time


In [81]:
seed = 123
def randomSeed(SEED):
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True
randomSeed(seed)

In [82]:
spacy_de = spacy.load("de_core_news_sm")
spacy_en = spacy.load("en_core_web_sm")

In [83]:
def tokenize_de(text):
    return [tok.text for tok in spacy_de.tokenizer(text)]

def tokenize_en(text):
    return [tok.text for tok in spacy_en.tokenizer(text)]

In [84]:
SRC = Field(tokenize=tokenize_de, init_token="<sos>", eos_token = "<eos>", lower=True)

TRG = Field(tokenize=tokenize_en, init_token='<sos>', eos_token="<eos>", lower=True)

In [85]:
train_data, val_data, test_data = Multi30k.splits(exts=('.de', '.en'), fields=(SRC, TRG))

In [86]:
SRC.build_vocab(train_data, min_freq = 2)
TRG.build_vocab(train_data, min_freq = 2)

In [87]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [88]:
train_iter, val_iter, test_iter = BucketIterator.splits((train_data, val_data, test_data), batch_size=128, device=device)

### Building the Model

In [89]:
class Encoder(nn.Module):
    """
    双向GRU,最后返回hidden要拼接最后一层的前向和后向,然后再进行一个全连接+tanh变换函数即可
    """
    def __init__(self, input_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.GRU(emb_dim, enc_hid_dim, bidirectional=True)
        self.fc = nn.Linear(enc_hid_dim * 2, dec_hid_dim)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, src):
        # src = [src_len, batch_size]
        embedded = self.dropout(self.embedding(src))
        outputs, hidden = self.rnn(embedded)
        # outputs = [seq_len, batch, enc_hid_dim*2]
        hidden = torch.tanh(self.fc(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)))
        # [batch_size, dec_hid_dim]
        return outputs, hidden

### attention 层的构造

In [90]:
class Attention(nn.Module):
    def __init__(self, enc_hid_dim, dec_hid_dim):
        super().__init__()
        self.atten = nn.Linear((enc_hid_dim * 2) + dec_hid_dim, dec_hid_dim)
        self.v = nn.Linear(dec_hid_dim, 1, bias=False)
        
    def forward(self, hidden, encoder_outputs):
        
        batch_size = encoder_outputs.shape[1]
        src_len = encoder_outputs.shape[0]
        
        hidden = hidden.unsqueeze(1).repeat(1, src_len, 1)
        encoder_outputs = encoder_outputs.permute(1, 0, 2)
        
        energy = torch.tanh(self.atten(torch.cat((hidden, encoder_outputs), dim=2)))
        
        attention = self.v(energy).squeeze(2)
        
        return F.softmax(attention, dim=1)

### decoder

In [91]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout, attention):
        super().__init__()
        self.output_dim = output_dim
        self.attention = attention
        self.embedding = nn.Embedding(output_dim, emb_dim)
        
        self.rnn = nn.GRU((enc_hid_dim * 2) + emb_dim, dec_hid_dim)
        
        self.fc = nn.Linear((enc_hid_dim * 2) + dec_hid_dim + emb_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, input, hidden, encoder_outputs):
        input = input.unsqueeze(0)
        # [1, batch_size]
        embedded = self.dropout(self.embedding(input))
        # [1, batch_size, emb_dim]
        a = self.attention(hidden, encoder_outputs)
        # [batch_size, 1, seq_len]
        a = a.unsqueeze(1)
        
        encoder_outputs = encoder_outputs.permute(1, 0, 2) # [batch, seq_len, hid*2]
        weighted = torch.bmm(a, encoder_outputs)
        weighted = weighted.permute(1, 0, 2)
        # [1, batch, enc_hid*2]
        rnn_input = torch.cat((embedded, weighted), dim=2)
        # hidden = [batch, dec_hid_dim] -> [n_layers * n_direction, batch, dec_hid_dim]
        output, hidden = self.rnn(rnn_input, hidden.unsqueeze(0))
        
        embedded = embedded.squeeze(0)
        output = output.squeeze(0)
        weighted = weighted.squeeze(0)
        
        prediction = self.fc(torch.cat((output, weighted, embedded), dim=1))
        return prediction, hidden.squeeze(0)
        

In [102]:
class seq2seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
    
    def forward(self, src, tag, teacher_forcing_ration=0.5):
        # src [src_len, batch]
        # tag [tag_len, batch]
        batch_size = src.shape[1]
        tag_len = tag.shape[0]
        tag_vocab_size = self.decoder.output_dim
        outputs = torch.zeros(tag_len, batch_size, tag_vocab_size).to(device)
        
        encoder_outputs, hidden = self.encoder(src)
        input = tag[0, :]
        
        for t in range(1, tag_len):
            output, hidden = self.decoder(input, hidden, encoder_outputs)
            outputs[t] = output
            
            teacher_force = random.random() < teacher_forcing_ration
            
            top1 = output.argmax(1)
            print('the top1 is: ', top1)
            input =  tag[t] if teacher_force else top1
        return outputs
            

In [103]:
INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
ENC_HID_DIM = 512
DEC_HID_DIM = 512
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

attn = Attention(ENC_HID_DIM, DEC_HID_DIM)
enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn)

model = seq2seq(enc, dec, device).to(device)

In [104]:
def init_weights(m):
    for name, param in m.named_parameters():
        if 'weight' in name:
            nn.init.normal_(param.data, mean=0, std=0.01)
        else:
            nn.init.constant_(param.data, 0)
            
model.apply(init_weights)

seq2seq(
  (encoder): Encoder(
    (embedding): Embedding(7854, 256)
    (rnn): GRU(256, 512, bidirectional=True)
    (fc): Linear(in_features=1024, out_features=512, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Decoder(
    (attention): Attention(
      (atten): Linear(in_features=1536, out_features=512, bias=True)
      (v): Linear(in_features=512, out_features=1, bias=False)
    )
    (embedding): Embedding(5893, 256)
    (rnn): GRU(1280, 512)
    (fc): Linear(in_features=1792, out_features=5893, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [105]:
optimizer = optim.Adam(model.parameters())

In [106]:
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

In [107]:
def train(model, iterator, optimizer, criterion, clip):
    
    model.train()
    
    epoch_loss = 0
    
    for i, batch in enumerate(iterator):
        
        src = batch.src
        trg = batch.trg
        
        optimizer.zero_grad()
        
        output = model(src, trg)
        
        #trg = [trg len, batch size]
        #output = [trg len, batch size, output dim]
        
        output_dim = output.shape[-1]
        
        output = output[1:].view(-1, output_dim)
        trg = trg[1:].view(-1)
        
        #trg = [(trg len - 1) * batch size]
        #output = [(trg len - 1) * batch size, output dim]
        
        loss = criterion(output, trg)
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [108]:
def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):

            src = batch.src
            trg = batch.trg

            output = model(src, trg, 0) #turn off teacher forcing

            #trg = [trg len, batch size]
            #output = [trg len, batch size, output dim]

            output_dim = output.shape[-1]
            
            output = output[1:].view(-1, output_dim)
            trg = trg[1:].view(-1)

            #trg = [(trg len - 1) * batch size]
            #output = [(trg len - 1) * batch size, output dim]

            loss = criterion(output, trg)

            epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [109]:
N_EPOCHS = 1
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, train_iter, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, val_iter, criterion)
    
    end_time = time.time()
    
#     epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut3-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

the top1 is:  tensor([2101, 4790, 4790, 5224, 3342, 2032,  427, 3680, 3054, 4630, 2837, 4790,
        4207, 3804,  424, 5239, 1664, 2158, 5636, 2154,  790, 5239, 4526, 1664,
        1415,  424, 2194, 3693, 1776, 2194, 4889, 1393, 4938, 2733, 3631, 1728,
        2143, 5401, 3427, 4790, 2143, 4526, 3543, 1526, 2222, 5806, 3631,  683,
        1643, 2101, 1393, 2940,  812, 2143, 1962, 4630, 2143, 5869, 1173, 5841,
        2750,  807,  683, 3501, 2154,  812, 4591,  424, 3879, 4427,  427,  424,
        3040, 1408,  427, 2946, 4790, 2337, 3631, 5061, 2459,  424, 1647, 5129,
        5530, 3427, 3412, 4568,  428, 2222,  428, 4719, 1526, 5530,  683,  609,
        4279, 4790, 2143, 1002, 2222, 3427,  428, 2721, 1010, 5845, 3427, 4790,
         428, 5474, 2597, 1119, 4009, 4790, 3427, 2337, 1526, 1119, 2733, 3427,
        2222, 3057, 5865, 3403,  790, 4009, 3040, 3403], device='cuda:0',
       grad_fn=<NotImplemented>)
the top1 is:  tensor([ 658, 1702,  720, 4528, 5566,  150, 2982, 4170,  658, 515

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 6, 4, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        9, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 4, 4, 4, 4, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 9, 9, 9, 4, 9, 4,
        9, 4, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 9, 9, 9, 9, 4, 4, 9, 4, 4, 9, 9,
        9, 4, 4, 4, 9, 4, 9, 9, 4, 9, 9, 4, 9, 9, 9, 4, 4, 4, 9, 4, 4, 4, 4, 4,
        4, 9, 4, 9, 4, 4, 4, 9, 4, 4, 9, 4, 6, 9, 4, 9, 9, 9, 9, 4, 9, 4, 4, 9,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 4, 9, 9, 9, 4, 4, 9, 4, 9, 4, 4, 9,
        4, 4, 9, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 9, 9, 4, 9, 4, 9, 9, 4, 4, 4, 4, 9, 4, 9, 9, 9, 4, 9, 9, 9, 4, 9, 9,
        4, 4, 4, 9, 4, 9, 9, 4, 9, 9, 4, 9, 4, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9,
        9, 4, 9, 9, 4, 4, 4, 9, 4, 4, 4, 9, 9, 4, 9, 4, 9, 9, 4, 9, 4, 4, 9, 4,
        9, 4, 9, 9, 4, 9, 4, 9, 4, 4, 9, 4, 4, 9, 9, 4, 9, 4, 9, 4, 9, 9, 4, 9,
        9, 4, 4, 9, 9, 4, 4, 9, 9, 4, 4, 4, 9, 9, 9, 4, 9, 4, 9, 9, 9, 9, 9, 4,
        9, 9, 4, 9, 9, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 4, 9, 9, 9, 9, 9, 9, 4, 9, 9, 4, 4, 4, 9, 9, 9, 9, 9, 4, 4, 9, 9,
        9, 9, 9, 4, 9, 9, 9, 9, 9, 4, 4, 9, 9, 4, 9, 9, 9, 4, 4, 9, 9, 9, 4, 9,
        4, 9, 9, 9, 4, 9, 4, 9, 9, 4, 4, 9, 4, 9, 9, 4, 9, 4, 4, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 4, 4, 9, 9, 9, 9, 9, 9,
        9, 4, 9, 4, 9, 9, 9, 9, 9, 4, 9, 4, 9, 4, 9, 9, 4, 9, 9, 4, 9, 9, 4, 4,
        9, 9, 9, 4, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 4, 4, 4, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 4, 4, 9, 9, 9, 9, 4,
        9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 4, 4, 9,
        9, 4, 4, 4, 4, 4, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9,
        9, 4, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 4, 4, 9, 9, 4, 4, 4, 9, 9, 9,
        9, 4, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 4, 4, 4, 4, 9, 9, 4, 9, 4, 9, 4, 9,
        4, 9, 9, 9, 4, 9, 4, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 4, 4, 9, 9, 9, 9, 4, 4, 9, 9, 4, 4, 9, 9, 4, 4, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 4, 4, 4, 9, 4, 9, 9, 4, 9, 4, 9, 9, 9, 9, 9, 9,
        9, 4, 4, 9, 9, 9, 9, 9, 9, 9, 4, 4, 9, 9, 9, 9, 9, 4, 9, 9, 4, 4, 9, 9,
        9, 4, 9, 9, 4, 4, 9, 9, 9, 9, 9, 4, 9, 9, 4, 9, 4, 9, 9, 4, 9, 9, 4, 4,
        9, 9, 9, 9, 4, 9, 4, 9, 4, 4, 9, 9, 9, 4, 4, 4, 9, 4, 4, 9, 9, 4, 4, 9,
        9, 9, 4, 9, 4, 4, 9, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 9, 9, 4, 4, 4, 9, 9, 9, 9, 9, 4, 9, 4, 9, 9, 9, 9, 4, 9, 9, 4, 9, 9,
        4, 9, 9, 9, 9, 4, 4, 4, 4, 9, 4, 9, 9, 4, 9, 9, 4, 9, 4, 9, 9, 4, 4, 9,
        4, 9, 9, 4, 4, 9, 4, 4, 4, 9, 9, 4, 9, 9, 4, 9, 4, 9, 4, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 4, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 4, 4, 9, 4, 4, 4, 4,
        4, 4, 4, 4, 9, 9, 9, 9, 9, 9, 4, 4, 9, 4, 9, 9, 4, 9, 9, 9, 9, 9, 4, 4,
        9, 4, 4, 4, 4, 4, 9, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 4, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 4, 9, 4, 9, 9, 4, 4, 9, 9, 9, 4, 9, 4, 9, 4, 9, 9, 9, 4, 4, 4,
        9, 4, 4, 9, 4, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 4, 9, 9, 9, 9, 9, 9,
        4, 9, 9, 9, 4, 9, 4, 4, 9, 4, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 4, 4, 9, 9,
        4, 9, 9, 4, 9, 4, 4, 4, 4, 4, 9, 4, 4, 4, 9, 4, 4, 9, 9, 9, 4, 4, 4, 9,
        9, 4, 9, 4, 9, 9, 9, 4, 9, 4, 9, 9, 9, 9, 9, 9, 9, 4, 4, 4, 9, 9, 4, 9,
        4, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 4, 4, 9, 9, 9,
        9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 4, 9, 9, 4, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 4, 9, 4, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 4, 9, 4,
        9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 4, 9, 9, 4, 9, 4, 4, 9, 9, 4, 9, 9, 9, 9, 4, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 6], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 4, 9, 9, 4, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 4, 9, 9, 9, 4, 9, 9, 9, 9,
        9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9,
        9, 9, 4, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 9, 4, 9, 4, 4, 9, 4, 4, 9, 4, 4, 4, 4, 4, 4, 4, 9, 4, 4, 4, 9, 4, 4,
        4, 9, 4, 9, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 4, 9, 9, 9, 4, 4,
        4, 4, 4, 9, 4, 9, 6, 4, 4, 4, 4, 9, 9, 4, 9, 6, 9, 4, 4, 9, 4, 4, 4, 4,
        9, 9, 4, 4, 4, 4, 9, 4, 9, 6, 4, 4, 4, 4, 4, 4, 9, 4, 4, 4, 4, 4, 9, 9,
        4, 9, 9, 4, 4, 4, 9, 4, 4, 9, 4, 9, 9, 4, 4, 4, 9, 6, 9, 4, 9, 4, 4, 9,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 4, 9, 9, 9, 9, 9, 9, 9, 4, 4, 9, 9, 9, 4, 9, 4, 9, 9, 9, 9, 9, 4, 9,
        4, 4, 9, 4, 9, 9, 4, 9, 9, 4, 9, 9, 9, 4, 4, 9, 9, 9, 9, 4, 9, 4, 9, 4,
        4, 9, 4, 9, 9, 9, 9, 4, 4, 9, 9, 9, 4, 4, 9, 4, 9, 9, 9, 9, 4, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 4, 4, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 4, 9, 4, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 4, 9, 4, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 9, 9, 4, 4, 4, 9, 4, 9, 6, 9, 9, 4, 4, 4, 4, 4, 4, 4, 9, 4, 4, 9, 9,
        6, 9, 9, 4, 9, 4, 9, 4, 4, 9, 4, 4, 4, 9, 4, 9, 4, 9, 9, 4, 9, 4, 9, 9,
        9, 9, 9, 9, 6, 9, 9, 4, 9, 9, 4, 9, 6, 4, 9, 9, 9, 9, 4, 9, 4, 4, 4, 9,
        9, 9, 9, 9, 9, 9, 9, 4, 4, 4, 4, 9, 9, 4, 9, 4, 4, 4, 4, 9, 9, 4, 9, 9,
        4, 4, 6, 9, 4, 9, 4, 9, 4, 4, 9, 9, 9, 4, 9, 4, 4, 4, 4, 9, 9, 4, 9, 9,
        4, 9, 9, 9, 4, 9, 9, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 4, 4, 4, 9, 4, 9, 4, 4, 4, 9, 9, 4, 9, 9,
        9, 9, 9, 4, 4, 9, 9, 9, 9, 4, 4, 9, 4, 9, 9, 4, 4, 4, 9, 9, 4, 9, 4, 4,
        9, 9, 9, 4, 4, 4, 9, 4, 9, 9, 9, 4, 4, 9, 4, 4, 9, 9, 4, 9, 4, 4, 9, 9,
        9, 4, 9, 9, 4, 9, 9, 9, 9, 4, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 4, 9, 9,
        9, 9, 4, 9, 9, 4, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 4, 4, 9, 4, 9, 9,
        9, 9, 9, 4, 4, 9, 4, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 4, 9, 4, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 4, 4, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 4, 9, 4, 9, 9, 4, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 4, 9, 9, 9,
        9, 9, 9, 9, 9, 4, 9, 4, 9, 4, 4, 9, 4, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9,
        4, 9, 9, 4, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 4,
        4, 9, 9, 4, 9, 9, 4, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 4, 4, 9, 9, 9, 9, 4, 4, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 4,
        9, 9, 4, 9, 9, 9, 9, 9, 9, 4, 9, 4, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9,
        4, 9, 9, 4, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9,
        4, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 6, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 4, 9, 4, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 4, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 4, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4,
        9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 4, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4,
        4, 4, 9, 9, 4, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 4,
        9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 4, 4, 9, 4, 9, 9, 9, 4, 9, 9,
        9, 4, 9, 9, 9, 9, 9, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  4,  4,  9,  9,  9,  9,  9,  9,  9,  9,  9,  4,  9,  9,  9,  4,
         9,  9,  4,  9,  4,  4,  9,  9,  4,  4,  9,  9,  9,  9,  9,  4,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  4,  9,  4,  9,  4,  9,  9,  9,  9,  9,  4,
         9,  9,  9,  9,  9,  9,  9,  9,  4,  9,  4,  9,  9,  4,  4,  4,  4, 30,
         4,  9,  9,  9,  9,  9, 17,  9,  9,  9,  9,  9,  4,  9,  9, 30,  9,  9,
         9,  4,  9,  9,  4,  4,  9,  9, 30,  9,  9,  9,  9,  9,  9,  4,  4,  4,
         9,  9, 

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 4, 4, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 4,
        9, 9, 9, 4, 4, 9, 9, 9, 9, 9, 9, 9, 4, 9, 4, 9, 4, 9, 9, 9, 9, 9, 4, 4,
        9, 4, 4, 9, 9, 6, 6, 9, 9, 6, 4, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 6, 4, 9, 9, 4, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 4,
        9, 4, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 4, 4, 9, 9, 9, 4, 9, 9, 4, 9,
        9, 9, 6, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 4, 6, 9, 9, 9, 9, 9, 9, 9, 4, 4, 6, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9,
        9, 4, 9, 4, 4, 9, 4, 9, 9, 4, 9, 9, 4, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 4,
        9, 9, 9, 4, 9, 9, 6, 6, 9, 9, 9, 9, 9, 4, 9, 4, 9, 9, 9, 4, 9, 4, 4, 9,
        9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 4, 9, 9, 9,
        9, 4, 9, 9, 9, 4, 9, 4, 9, 9, 4, 4, 9, 9, 9, 9, 9, 4, 6, 9, 9, 9, 9, 9,
        9, 9, 9, 4, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 4,  9,  9,  9,  9,  9,  4,  4,  9,  4,  9,  9,  9,  9,  6,  9,  9,  9,
         9,  9,  9,  9,  6,  9,  4,  9,  9,  9,  4,  9,  9,  9,  9,  9,  9,  6,
         9,  4,  9,  9, 19,  9,  4,  9,  9,  9,  9,  9,  9,  4,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  6,  9,  4,  9,  9,  9,  9,
         9,  4,  9,  9,  9,  4,  9,  9,  9,  9,  9,  4,  9,  4,  9,  9,  9,  9,
         9,  9,  9,  9,  6,  9,  9,  9,  9,  9,  4,  9,  9,  9,  9,  9,  9,  6,
         9,  9, 

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 6, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        6, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 6, 9, 4, 9, 9, 4, 9, 9, 6, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 4, 9, 9, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 6, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 6, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 4, 9, 6, 9, 9,
        4, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  6,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 11,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  4,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  4,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9, 

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9,
        9, 9, 9, 9, 9, 6, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 4,
        9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 4, 9, 4,
        9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 6, 4, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 4, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 4, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 6,  9,  9,  9,  9,  9, 19,  9,  9,  9,  9,  9,  6,  9,  4,  9,  9,  6,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  4,  9,  9,  9,
         9,  9,  9,  9, 19,  9,  9,  9,  9,  6,  4, 19,  9,  9,  9,  9,  4,  4,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  4,  9,  9,  9,  9,
         9,  6,  9,  4,  9,  9,  9, 19,  9,  9,  6,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  4,  9,  9,  9,  9,  9,  9, 19,  4,  9,  9,  9,  4,
         4,  9, 

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  4,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9, 

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  6,  9,  9,  6,  9,  9,  9,  9,  6,  9, 30,  6,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 30,  4,  6,  9,  6,  4,  4,
         9,  9,  9,  9,  4,  9,  9,  9,  9,  9,  9,  9,  4,  9,  6,  4,  9,  9,
         9,  4, 19, 19,  9, 30,  9,  9,  4, 30,  9,  9,  9,  9, 30,  9,  9,  9,
         4,  4,  9,  9,  9,  9,  4,  9,  9,  9,  9,  9,  9, 19,  9,  9,  4,  9,
         4, 30,  9,  9,  9,  9,  9,  4,  4,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9, 30, 

the top1 is:  tensor([16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([19,  9,  9,  9, 19,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  4,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9,
         9,  9,  9,  9,  9,  9, 

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([30,  9,  9,  9, 17,  9,  9,  9,  9, 19, 30,  9,  4,  9, 30, 30,  9,  9,
         9, 19,  9, 30,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9, 30, 30, 30,
         9,  9, 17,  9,  9,  9,  9, 19,  9,  9,  9, 30, 19,  9,  9,  9,  9,  9,
         9,  9, 30,  9,  9,  9, 17,  9,  9,  9,  9,  9, 19,  9, 19, 30,  9, 30,
         9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9, 19,  9,  9,  9,  9,  4,  9,
        19,  6,  9,  9,  9,  9,  9,  9,  9, 17,  4,  9,  4, 19,  9,  9, 30, 30,
         9,  9, 

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
        19,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9,
         9,  9,  9, 19,  9,  9,  9,  9,  9, 19,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9, 19,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,
         9,  9, 

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9, 24,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9, 19,  9,  9,  9,  9, 19,  9, 19,  9,  9,  9,  9,  9,
        19,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9, 19,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9, 19,  9, 19,  9,  9,  9,  9,
         9,  9, 

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 24,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 24,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9,  9,  9,
        24,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 24,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9, 

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 24,  9, 24,  9,  9,  9,  9,  9,
        24,  9,  9,  9,  9,  9,  9,  9,  9, 24,  9,  9,  9,  9,  9,  9,  9, 24,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 24,  9,  9,
        24,  9,  9,  9,  9, 24,  9, 24, 24,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9, 19,  9,  9, 24,  9, 19,  9,  9,  9,  9,  9,  9,  9,  9, 24,  9,
         9, 24, 

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
        9, 9, 9, 9, 9, 9, 9, 9], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  4, 30,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  4,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 30,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  6,  9,  9,  9,  9,  4,  9,  9,  9,  4,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9, 

the top1 is:  tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9, 30,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         4,  9,  9,  9,  9,  9,  9,  4,  9,  9,  9,  9,  9,  9,  9,  9,  9,  6,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  4,  9,  9,  9,  9,  6,  9,
         9,  9, 

        3, 3, 3, 3, 3, 3, 3, 3], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9, 19,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9, 19,  9, 19,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([19,  9,  9,  9, 19,  9,  4,  9,  9,  9,  9,  9, 19,  9,  9,  6,  9, 19,
        19,  9,  9,  9,  9,  9,  9,  6,  9,  9, 35,  9,  9, 19,  9,  9,  4,  9,
         9, 19, 17,  9,  9,  9,  9, 19, 19,  9, 19,  4, 19,  9,  9,  9,  9,  9,
         9,  9, 38,  9,  9,  9,  9,  9,  9,  9,  6, 35,  9,  9, 19,  9,  9, 35,
         9,  9,  9, 38,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9, 19,  9,  9,  9,  9,  9,  9,  9,  9, 38,  9,  9,  9,
         9, 38,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9,  9,  9,  9, 19,  9,
         9,  9, 38,  9,  9,  9,  9,  9,  9,  9,  9,  9, 38,  9,  9, 38,  9,  9,
         9,  9, 38, 38, 38,  9,  9,  9, 38, 38,  9,  9,  9,  9,  9,  9,  9, 19,
         9,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  4,  6,  9, 38,  9,  9,  6,  4,  9,  9,  9, 19,  9,  9,  9,
         4,  9,  6,  9,  9,  9,  4,  9,  9,  9,  9,  9,  9, 19,  6,  9,  9,  9,
        19,  6,  4,  4,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 22,  9, 19,
         9,  9,  4,  9, 19,  9,  9,  9, 38,  4,  9,  9, 19,  9,  9,  9,  9,  9,
         4,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,
         4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9, 30,  9,  9, 19,  6,  9,  9,  9,  9,  9,  4,  9,  9,  9,  9,
         9, 19,  9, 12,  9,  9, 30,  4,  9,  4,  9,  9,  9, 35,  9,  9, 38, 35,
         9,  9, 19,  9,  9,  9,  9, 38,  9,  9, 17,  9,  9,  6, 12,  9,  9,  4,
         4,  9,  6,  9,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9,  9,  6,
         9,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9, 38,  9,  9,  9,  9,  9,  9,  9,  9, 38,  9,
        38,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9, 38,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9, 38, 38,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  4,  9,  9,  9,  9,  9, 30,  9,  9,  9, 30,  9,  9,
         9,  4,  9,  4, 38,  9,  9,  9,  4,  9,  9, 30,  9, 17,  9, 30,  9,  9,
         9,  4,  4,  9,  4, 30,  9,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,
        17,  9,  9,  9,  9,  9,  4,  9,  9, 19, 19,  9,  9,  9,  9,  9,  9,  9,
         9, 30,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  4,  9,  9,  9, 30,  9,  9,  4,  9, 30,  9, 30,  9,
        38, 30, 30, 30,  9,  9,  9,  9, 30,  9, 30,  9,  9,  9,  9,  9,  9,  6,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 30,  9, 30,  9,  9,  9,  6,  9,
         9,  9,  9,  9,  9,  9,  6,  9,  9, 30,  9,  4,  4, 30, 30,  4, 30,  6,
         4, 30,  9,  6,  9, 30, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 30,  9,
         9,  9,  9,  9,  9, 30,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 38,
         9,  9,  9,  9, 30,  9,  9,  9,  9,  9,  9,  9,  9,  9, 30,  9,  9, 30,
         9,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9, 30,  9,  9,  9,  9,  9,  9,  9,  9, 30,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
        30,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9, 30,  9,  9, 30,  9,  9,  9,  9,  9, 30,  9, 30,
         9,  9, 30,  9,  9, 30,  9,  9,  9,  9,  9,  9,  9,  9, 17,  9, 30,  9,
         9,  4,  9, 17, 30,  9,  9,  9, 30,  9, 30, 30,  9,  9,  9,  9,  9,  9,
         4,  9,  9,  9,  9, 30,  9,  9,  9,  9,  6, 30, 30,  9,  9,  9,  9,  9,
        30,  9,  9,  9,  6,  9, 

the top1 is:  tensor([ 4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9, 30,  9,  9,  9,  9,  9,  9,  9,  9,  9, 30,  9,  9,  9, 30,  9,
         9,  9,  9,  9, 30,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9, 

the top1 is:  tensor([16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4, 16,  4,  4,  4, 16, 16,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([30,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  4, 30,  9,  9,  9,
         9, 30,  9,  9,  9,  9,  9,  9,  9, 30,  9,  9,  9,  4,  9,  9,  9, 30,
         9,  9,  9,  9,  9,  9,  9, 30, 30, 30,  9,  4,  9,  6,  9, 19, 17,  9,
         9,  6,  4,  9,  9,  9,  9,  4,  9,  9,  9,  9, 30,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16, 16,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,
         4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9, 30,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 30,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9, 30,  9, 30, 30,  9,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 30,  9,  9,  9,  9,  9, 30,  9,
         9,  9, 30,  9,  9, 30,  9,  9,  9,  9,  9, 30,  9,  9,  9, 30,  9,  9,
         9,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4, 16,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9, 30,  9,  9,  9,  9,  9, 30,  9,  9, 30, 30,  9, 30,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9, 19,  9,  9,  9, 30,  9,  9,  9,  9,  9,  9,  9,
         9, 30,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 30,  9,
         9,  9, 30,  9,  9,  9,  9,  9, 30,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9, 30,  9,  9, 30, 

the top1 is:  tensor([ 4,  4, 16, 16,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,
        16,  4, 16,  4,  4,  4, 16,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9, 19, 19,  9, 19,  9,  9, 19,  9,  9,  9,  9,  9,  9, 19,  9,  9,
        19,  9, 19,  9,  9,  9, 30,  9,  9, 19,  9, 19,  9,  9,  9,  9,  9,  9,
         9,  9,  9, 19,  9,  9,  9, 30, 19,  9,  9,  9,  9,  9,  9,  9,  9,  9,
        30,  9,  9,  9,  9,  9, 19,  9,  9,  9,  9, 19,  9,  9,  9,  9,  9, 19,
         9,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
        16, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,
         4,  4, 16, 16, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,
        16,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16, 16,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,
        16,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,   9,   9,  19,   9,  19,  19,   9,   9,   9,   9,   9,  19,   9,
          9,   9,   9,   9,  19,  17,  30,  19,   9,   9,   9,   9,   9,   9,
          9,   9,  19,   9,  19,   9,   6,   9,   9,   9,  19,  19,  19,   9,
          9,   9,   9,   9,   9,  19,   9,   9,   9,  17,   9,   9,  17,   9,
         19,  19,   9,   9,   9,   9,   

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,
        16, 16,  4,  4,  4, 16, 16, 16,  4,  4,  4,  4,  4,  4, 16, 16,  4, 16,
        16, 16,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16, 16,  4,  4,
         4, 16,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,
         4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4, 16,  4,
         4, 16], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9, 30,  9,  9, 38, 19,  9,  9,  9,  9,  9, 19,  9, 17,  9, 38, 19,
        19, 19,  9, 30,  9, 19, 38, 19,  9,  9,  9,  4,  9,  9, 19, 19,  9, 19,
        19, 19,  9,  9, 19,  9,  9,  9, 19, 38, 19,  9,  9,  9, 19, 19,  9,  9,
         9, 19,  9,  9, 19,  9, 19, 19, 17,  9,  9,  9,  9,  9,  9,  9, 19,  9,
         9,  9, 19,  9, 19, 17, 

the top1 is:  tensor([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,
        16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,
         4, 16,  4,  4,  4,  4, 16, 16, 16,  4,  4,  4,  4, 16, 16,  4, 16,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([38,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9, 19,  9,  9,  9,  9,
        19,  9,  9, 19,  9, 38,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9,
         9, 19,  9,  9,  9,  9, 19, 19, 19,  9,  9,  9,  9, 19, 19,  9, 19,  9,
         9,  9,  9, 38,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16, 16,  4,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4,  4,
         4,  4, 16, 16,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4, 16,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9, 38, 19, 38,  9,  9,  9,  9,  9,  9,  9,  9, 19, 19,  9,  9, 19,  9,
         9,  9, 19,  9,  9,  4,  6,  9, 17,  9,  9, 19,  6, 19, 38, 19,  9, 38,
         9,  9,  9,  9,  9,  9,  9,  9,  9, 38,  9,  6, 19,  9, 38, 19, 19,  9,
         9, 19, 19,  9,  9,  9, 19,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9,  9, 38,  6, 19, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4, 16, 16,  4,
         4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,
        16,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([19,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 19,  9, 19, 38, 38, 17,  4,
         9,  4, 17,  9,  9, 17,  9,  9,  9, 19,  9,  9, 19,  9,  9,  9, 19,  9,
         9,  6, 19,  9,  9,  9,  9,  9,  9,  9,  9,  4,  9,  9,  9,  9, 38,  9,
        38,  9, 19,  9,  9,  9,  4,  9,  9, 30, 19,  9,  9,  9,  9,  9, 17,  9,
        38,  9,  9,  9, 19,  9, 

the top1 is:  tensor([ 4,  4, 16, 16, 16,  4,  4, 16, 16, 16,  4,  4,  4, 16,  4, 16,  4,  4,
         4, 16,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4, 16,  4,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,
         4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4, 16,  4,  4,
         4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,
         4,  4, 16,  4, 16,  4, 16,  4,  4,  4,  4, 16, 16,  4, 16,  4,  4, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9, 38, 19, 19, 19, 19,  9, 19, 19, 19,  9,  9,  9, 19,  9, 19,  6,  9,
         9, 17,  9,  9, 19, 19,  9,  9,  9, 19, 19, 30,  9,  9, 19,  9, 19,  9,
         9,  9, 38, 19,  9, 38,  9,  9,  9, 19,  4,  4, 19,  9, 17,  9,  9,  9,
         9, 19,  9,  9,  9, 19,  9,  9,  9,  9, 17, 19,  9,  9, 19,  9,  9, 17,
         9,  9,  9, 38,  9, 38, 

the top1 is:  tensor([ 4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,
        16, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,
        16,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4, 16,  4, 16,  4, 16,  4,  4,
         4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([38,  9,  9, 30,  9, 30,  9,  9,  9,  9,  9, 30,  9,  9,  9,  9, 38, 38,
        30, 30,  9,  9,  9,  9,  9,  9, 30,  9, 38, 38,  9, 30,  9,  9,  9,  9,
         9,  9,  9,  9,  9, 30,  9,  9,  9, 38,  9,  9,  9,  9,  9,  9,  9,  9,
         9, 19, 30,  9,  9, 19,  9,  9,  9,  9, 38,  9,  9,  9,  9, 30,  9,  9,
        30,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4, 16,
         4,  4,  4,  4,  4,  4, 16, 16,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,
         4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4,
         4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9, 30,  9,  9, 30,  9,  9,  9,  9, 30,  9,
         9,  9,  9,  9,  9,  9, 30,  9,  9,  9,  9,  9,  9,  9,  9,  9, 38,  9,
         9,  9,  9, 30,  9,  9,  9,  9,  9,  9, 30,  9,  9,  9, 38, 30,  9, 30,
         9,  9,  9,  9,  9,  9, 30, 30,  9, 30,  9,  9,  9,  9,  9,  9, 30,  9,
         9, 30,  9,  9, 38,  9, 

the top1 is:  tensor([ 4, 16,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16, 16,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4, 16, 16,  4,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4,  4,  4, 16,  4, 16,  4, 16, 16, 16,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16, 16,  4, 16,  4, 16,  4,  4, 16,  4,  4, 16,
        16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,  19,   9,   9,   9,   9,  38,  17,   9,   9,   9,   9,   9,   9,
          9, 115,  38,   9,   9,  30,  30,   9,   9,   9,   9,   9,  30,   9,
          9,   9,   9,  30,  30,  30,   9,   9,   9,   9,  19,   9, 115,   9,
          9,   9,   9,   9,  30,   9,   9,   9,   9,   9,   9,   9,   9,  19,
          4,  38,   9,  19,   9,  30,   

the top1 is:  tensor([ 4, 16,  4,  4, 16, 16,  4,  4, 16,  4,  4, 16, 16, 16,  4,  4, 16,  4,
         4, 16,  4, 16, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4,
         4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4, 16, 16, 16,  4,
         4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4,
        16, 16,  4,  4,  4, 16, 16, 16,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4, 16,  4, 16, 16,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,
         4,  4,  4,  4,  4, 16, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([115,  17,   9,   9,  38,  30,   9,   9,  30,   9,   9,  30,  30,  19,
          9,   9,  30,   9,   9,  17,   9,  30,  17,   9,   9,  30,   9,   9,
          9,   9,   9,   6,   6,  19,  17,   9,  30,   4,   9,  38,   9,   9,
          9,  30,   9,  30,   9,   9,   9,   9,  30,  30,  30,   9,  38,  30,
          9,   9,   9,   9,   9,   9,   

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4, 16,  4, 16,  4, 16,  4, 16,
        16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,
         4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4, 16, 16, 16,  4,  4, 16,
         4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4,  4,  4, 16,
         4,  4,  4, 16,  4,  4,  4,  4, 16, 16, 16,  4,  4, 16,  4,  4,  4, 16,
         4,  4,  4, 16,  4, 16,  4, 16,  4, 16,  4, 16,  4,  4,  4,  4,  4, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,  30,   9,   9,   9,   9,   9,  17,  30,  38,   9,  30,  38,  30,
          9,  30,   9,  30,  30,   9,   9,  30,   6,   9, 115,  38, 115,   9,
          9,   9,   9,  30,   9,   9,   9,   9,   9,   9,   9,   6,  17,   9,
          9,  17,   9,   9,   9,   9,  30,   9,  22,  30,  38,   6,   9,   9,
          9,  30,   9,   4,   9,   6,   

the top1 is:  tensor([ 4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,
         4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16, 16, 16,
         4,  4,  4,  4, 16,  4,  4, 16,  4, 16,  4, 16,  4,  4,  4, 16, 16, 16,
         4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,
         4,  4, 16,  4,  4,  4,  4, 16,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4, 16,  4,  4,  4,  4, 16,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,   9, 115,  38,   9,  30,   9,  30,   9,   9,   9,   9,   9,  30,
          9,  38,  30,  30,   9,   6,   9,   9,   9,  30,   9,   9,   9,   9,
         30,   9,   9,   9,   9,  17,   9,   9,   9,   9,   9, 115,   9,   9,
         30,   9,   9,  17, 115,  12,  30,   9,   9,  30,  30,  30,   9,   9,
          9,   9,  30,   9,   9,  30,   

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,
         4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16, 16,  4,  4,  4, 16,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16, 16,  4, 16,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16, 16, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,
         4,  4,  4,  4,  4, 16, 16, 16, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,   9,   9,   9,   6,   9,   9,  30,   9,   9, 115, 115,   9,  30,
         30,   9,   9,   9,   9,   9,   9,   9, 115,  30,   9,   9,   9,   9,
         30,  30,   9,   9,   9,  30,   9,   9,   9,   9,   9,   9,   9,   9,
        115,   9,   9,   9,  17,  30,  30,   9,  30,   9,   6,   9,   9,   6,
          9,  30,   9,   9,   9,   9,   

the top1 is:  tensor([ 4, 16, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4, 16, 16,  4,  4,  4, 16, 16,  4, 16,
         4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16, 16,
         4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4, 16, 16,  4, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,  30,  30,   9, 115,  30,   9,   9,   9,   9,   9,   9,  30,   9,
          9,   9,   9,   9,   9,   9, 115,   9,  17,   9,   9,   9,   9,   9,
          9,   9,   9,   9,   9,  30,   9,   9, 115,   9,   9,   9,  17,  30,
          9, 115,   9,  30,  30,  17,   9,   9,  30,  30,   9,  30,   9,   9,
          9,   9,  30,   9,   9,   9,   

the top1 is:  tensor([ 4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,
        16, 16, 16,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4, 16,  4,
         4, 16,  4,  4,  4,  4, 16, 16,  4,  4, 16,  4,  4,  4, 16, 16,  4,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4,  4, 16,  4,
        16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,
        16,  4,  4,  4,  4, 16,  4, 16,  4, 16,  4, 16,  4, 16, 16,  4, 16,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,   9,   9,   9,  30,   9,   9,   9,  30,   9,  30,   9,   9,   9,
          9,   9,   6,  30,  30,  30,  30,  30,   9,   9,   9,   9,   6,  38,
          9,  38,   9,   9, 115,   9,   6,  30,   9,   9,   9,   9,   9,   9,
          9,   9,  38,   9,   9,   9,  30,   9,  30,   9,  30,   9,   9,  30,
          9,   9,   9,   9,  30,  30,   

the top1 is:  tensor([ 4,  4,  4,  4, 16, 16,  4,  4,  4, 16,  4,  4,  4, 16, 16,  4,  4,  4,
         4,  4,  4,  4, 16, 16,  4,  4,  4,  4, 16,  4,  4, 16,  4, 16,  4,  4,
         4,  4,  4,  4,  4, 16, 16, 16,  4,  4, 16,  4,  4,  4,  4, 16,  4, 16,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,
         4,  4, 16,  4,  4,  4, 16,  4,  4, 16, 16,  4,  4,  4,  4, 16,  4,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9, 30, 30,  9,  9,  9, 30,  9,  9,  9, 30, 30,  9,  9,  9,
         9,  9,  9,  9, 30, 30,  9,  9,  9,  9, 30,  9,  9, 30,  9, 30,  9,  9,
         9,  9, 38,  9,  9, 30, 30, 30,  9, 38, 30,  9, 38,  9, 38, 30,  9, 30,
         9,  9,  9, 30,  9,  9,  9,  9,  9,  9,  9, 30,  9,  9,  9, 38,  9,  9,
         9, 38, 30,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,
         4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4, 16,
         4, 16,  4,  4, 16,  4,  4,  4, 16, 16,  4,  4,  4, 16,  4, 16,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,
         4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,   9,   9,   9,   9,  30,   9,   9,   9,   9,   9,   9,   9,   9,
          6,  30,   4,  17,   9,   9,  30,  30,  30,   9,  38,   9,   9,   9,
          9,  17,  17,   9,  17,  30,   9,  38,   9,  38,   9,   9,  30,   9,
          9,   9,  17,  30,   9,   9,   9,  30,   9,  30,   9,  30,  38,   9,
          9,   9,   9,   9,   9,   9,   

the top1 is:  tensor([ 4,  4,  4, 16, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        16,  4, 16,  4, 16,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4,
         4,  4, 16,  4, 16,  4,  4,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,
         4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9, 30, 30, 30,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 38,
        30,  9, 30, 38, 30,  9, 30,  9,  9,  9,  9,  9, 30,  9,  9, 30, 30,  9,
         9,  9, 30,  9, 30,  9,  9,  9, 30,  9, 38, 30,  9,  9, 30,  9,  9,  9,
        30,  9,  9,  9,  9,  9,  9, 30,  9, 30,  9,  9, 38,  9,  9,  9, 30,  9,
        38,  9,  9,  9,  9, 38, 

the top1 is:  tensor([16,  4,  4, 16, 16,  4,  4,  4,  4, 16,  4,  4, 16,  4, 16,  4, 16,  4,
        16,  4,  4,  4, 16,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4, 16,  4,
        16,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4, 16, 16, 16,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([30,  9,  9, 30, 30,  9, 38,  9,  9, 30,  9,  9, 30,  9, 30,  9, 30,  9,
        30,  9,  9,  9, 30,  9,  9, 38, 30,  9, 30,  9,  9,  9,  9,  9,  9, 30,
         9, 38,  9, 30,  9, 38,  9,  9,  9, 30,  9,  9,  9,  9,  9,  9,  9,  9,
         9,  9, 30, 30,  9, 38,  9,  9,  9,  9,  9, 30, 19,  9,  9,  9,  9,  9,
         9,  9,  9, 30,  9,  9, 

the top1 is:  tensor([16,  4,  4,  4, 16,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4,
         4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16, 16, 16, 16,  4, 16,  4, 16,
        16, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16, 16,  4,  4, 16, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4, 16,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 30,  12,   9,   9,  38,   9,  19,  19,   9,   9,   6,   9,  17, 115,
          9,  19,  17,   9,   9,   9,   9,   9,   9,  17,   9,   9,   9,   9,
         19,  17,  19,  30,   4,  19,   9,  19,  30,  30,   9,   9,   6,   9,
          9,   9,   9,   9,  30,  30,   9,  38,   9,   9,  19,   9,  17,   9,
        115,   9,   9,   9,   9,  38,   

the top1 is:  tensor([16,  4, 16,  4, 16, 16, 16,  4,  4,  4, 16,  4,  4,  4, 16, 16,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16, 16,  4,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4, 16,  4,  4,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4,  4,  4, 16,  4, 16,  4,  4,
         4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4, 16, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([30,  9, 19,  9, 19, 19, 30,  9,  9,  9, 19,  9,  9,  9, 30, 19,  9, 19,
         9,  9,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9, 19, 19,  9,  9, 38,  9,
        19, 38,  9,  9,  9,  9,  9,  9, 30,  9, 19,  9, 19,  9,  9,  9,  9,  9,
        19,  9,  9,  9,  9,  9,  9, 19,  9,  9,  9,  9, 30,  9,  9,  9,  9,  9,
         9,  9,  9,  9,  9, 19, 

the top1 is:  tensor([ 4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4,  4,
         4, 16, 16,  4,  4, 16,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
        16,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
        16,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4,  4,
        16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,
        16,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,   9,   9,  19,   9,   9,   9,   9,  17,   9,   6,  19,   9, 233,
         19,   9,   6,   9,   9,  19,  19,   6,   9,  19,   9,   4,   9,  30,
         19,   9,   9,  35,   9,   9,   9,   9,   9,   9,   9,   9,   9,   9,
          9,  19,   6,   9,   4,   9,   9,   9,   9,   6,   6,   9,  19,   9,
         19,  35,   9,   9,   9,   9,   

the top1 is:  tensor([16, 16,  4,  4,  4,  4,  4, 16, 16,  4, 16,  4,  4, 16,  4, 16,  4,  4,
         4, 16,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16,  4,  4, 16, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16, 16,
         4,  4, 16,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
        16,  4, 16,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4, 16, 16,
         4,  4, 16,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4, 16, 16,  4,  4, 16,  4, 16, 16,  4, 16,  4,  4, 16,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([30, 19,  9,  9, 38,  9,  9, 30, 30,  9, 19,  9,  9, 19,  9, 19,  9,  9,
         9, 19,  9,  9,  9,  9,  9, 19, 19,  9, 38,  9,  9,  9,  9,  9,  9,  9,
         9,  9, 19,  9,  9, 19, 19,  9,  9,  9, 19,  9,  9,  9,  9,  9, 19, 30,
         9,  9, 19,  9,  9,  9, 30,  9,  9,  9, 19,  9,  9,  9,  9,  9,  9,  9,
        19,  9, 19,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,
         4,  4,  4, 16, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16,  4, 16, 16,  4, 16, 16,  4,  4, 16, 16,  4, 16,  4,  4,  4,
        16,  4,  4,  4, 16,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4, 16,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
        16,  4,  4, 16, 16,  4, 16,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,
        16, 16,  4, 16, 16,  4,  4, 16,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,   9,   9,   9,   9, 233,   9,  19,   9,   6,   4, 233,  38,   9,
          9,   9,  30,   9,   9,   9,   9,  19,  19,   9,  17,  19,   6,   9,
          9,   9,   9,   9,   9,   9,   9,   9,  19,   9,  17,   9,  19,  19,
        233,   9,  19,   9,   9,  19,  19,   9,  19,   9,   9,   9,  19,   9,
          9,  12,  19,   9,  19,   9,  1

the top1 is:  tensor([16,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4, 16,  4,  4,  4,
         4,  4,  4,  4, 16,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,
        16,  4, 16,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,
         4,  4,  4, 16,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4, 16,  4, 16,  4, 16,  4, 16, 16, 16,  4,  4, 16,  4,  4,
         4, 16,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 19,  38,   9,  19,   9,   9,  19,   9,   9,   9,   9,   9,   9,   9,
          9,   9,   9,   9,  38,   9,   9,   9,   9,   9,   9,   9,   9,   9,
          9,  19,  19,   9,  30,   9,   9,   9,   9,   9,   9,  38,  19,   9,
          9,  19,   9,  19,   9,  38,   9,   9,   9,   9,  19,   9,  19,   9,
         19,   9,  19,  19,   9,   9,   

the top1 is:  tensor([ 4,  4, 16,  4,  4, 16,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
        16, 16, 16, 16,  4,  4,  4,  4,  4,  4, 16,  4, 16, 16,  4,  4,  4,  4,
        16,  4,  4, 16,  4, 16, 16, 16, 16,  4,  4,  4, 16,  4,  4,  4, 16, 16,
         4,  4, 16,  4,  4,  4, 16, 16,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,
        16,  4,  4,  4, 16, 16, 16, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,
        16, 16,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4, 16,  4, 16,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9, 19,  9,  9, 19,  9, 19,  9,  9,  9,  9, 19,  9,  9,  9,  9,  9,
        19, 19, 19, 19, 38,  9,  9,  9,  9,  9, 19,  9, 19, 19,  9,  9,  9,  9,
        19,  9,  9, 19,  9, 19, 19, 19, 19,  9,  9, 38, 19,  9,  9,  9, 19, 19,
         9,  9, 19,  9,  9,  9, 19, 19,  9,  9,  9,  9, 19,  9,  9,  9, 19,  9,
        19,  9,  9,  9, 19, 19, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4, 16,  4,  4, 16, 16,
        16,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,
        16, 16, 16,  4,  4, 16,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9, 19, 38,  9,  9,  9,  9, 38,  9,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 19, 19,  9,
         9,  9, 19,  9, 38,  9,  9,  9,  9, 19,  9, 19,  9, 19, 38,  9, 19, 19,
        19,  9,  9,  9,  9,  9, 19, 19,  9,  9,  9,  9,  9, 19,  9, 19,  9,  9,
        19, 19, 19,  9,  9, 19, 

the top1 is:  tensor([16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,
        16,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4, 16,  4,
        16,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16, 16,  4, 16,  4, 16,  4, 16,  4,  4,  4, 16,  4, 16,  4,
        16,  4, 16, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4, 16, 16,  4,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([19,  9,  9,  9,  9, 19,  9,  9,  9,  9,  9, 19,  9,  9, 19,  9,  9,  9,
        19,  9,  9,  9, 19,  9,  9,  9,  9,  9, 19,  9, 19,  9, 38, 38, 19,  9,
        19,  9, 19,  9,  9, 38,  9,  9, 38, 19,  9,  9,  9,  9,  9, 38,  9,  9,
         9,  9,  9, 19, 19, 38, 19,  9, 19,  9, 19,  9, 38,  9, 19,  9, 19,  9,
        19,  9, 19, 19,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4, 16,  4, 16,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16, 16, 16, 16,  4,  4,  4, 16, 16,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,
         4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,
         4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4, 16,
         4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4, 16,
         4, 16,  4,  4,  4,  4, 16,  4, 16,  4,  4, 16,  4,  4,  4, 16,  4,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9, 115,   9,   9,   9,  19,   9,  19,   9,  19,   9,   9,  19,   9,
          9,   9,   9,  38,   9,   6,  38,   9,  35,   9,  19,  19,  19,  19,
          9,   9,  38,  19,  19,   9, 115,   9,  38,  38,  38,  19,   9,   9,
          9,   9,  17,   9,  38,   9,   9,   9,   9,  38,  19,   9, 115,  19,
         19,  38,   9,   9,   9,  19,   

the top1 is:  tensor([ 4, 16,  4,  4,  4,  4, 16, 16,  4, 16, 16,  4,  4,  4, 16, 16, 16,  4,
         4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4, 16, 16, 16,  4,  4,  4, 16,  4, 16, 16,
         4,  4,  4,  4, 16,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16, 16,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,
        16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16, 16, 16,  4,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9, 19,  9, 38,  9, 38, 19, 19,  9, 30, 19,  9, 38, 38, 30, 19, 30,  9,
        38, 19, 38, 38,  9, 38,  9,  9, 19, 38,  9,  9,  9, 30,  9, 38,  9,  9,
         9,  9,  9,  9,  9,  9,  9,  9, 30, 30, 19,  9, 38,  9, 30,  9, 19, 19,
         9, 38,  9, 38, 30, 38, 19,  9, 19,  9,  9, 38, 38,  9,  9,  9,  9,  9,
         9,  9, 19, 30,  9,  9, 

the top1 is:  tensor([16, 16, 16, 16,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4, 16,  4,  4, 16,
         4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,
        16,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,
         4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4, 16,  4, 16, 16,  4,
        16,  4, 16,  4, 16,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4,  4,  4,  4,  4,
         4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4, 16,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([30, 30, 30, 30,  9,  9,  9, 30,  9,  9, 38, 30, 38,  9, 30, 38,  9, 30,
         9,  9, 38, 38,  9, 38, 30,  9, 30,  9,  9, 38,  9, 38,  9, 30,  9,  9,
        30,  9, 38, 30,  9, 30,  9,  9,  9,  9,  9, 30,  9,  9,  9, 38,  9, 30,
         9, 30,  9,  9, 38, 30,  9,  9,  9,  9,  9, 30,  9, 30,  9, 30, 30,  9,
        30,  9, 30,  9, 30,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4, 16,  4,  4,  4,  4, 16,  4,
         4, 16,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4,  4,  4,
         4, 16,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,
        16,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16, 16,  4, 16, 16, 16, 16,  4,
         4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4, 16, 16,  4,  4,
         4,  4,  4, 16,  4, 16, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16, 16,  4,  4,  4, 16, 16, 16,  4, 16,  4,  4,  4,  4, 16,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,   9,  24,   6,  19,   9,   6,  30,  19,   9,   9,  30,   9,   9,
          9,   9,  30,   9,   9,  30,  38,   9,  30,   9,  17,   9,   9,   9,
         30,   9,   9,  30,  19,   9,  38,   9,  38,  30,   9,   9,   9,  17,
          9,   9,  30,   9,  38,   9,   9,   9,   9,   9,  38,  30,  30,   9,
          9,  19,   9,  38,  19,   6,   

the top1 is:  tensor([16,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4, 16,  4, 16,  4,  4,
         4, 16,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4, 16, 16,  4,  4,
        16, 16,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,
         4,  4, 16,  4, 16,  4,  4,  4, 16,  4, 16,  4,  4, 16,  4,  4,  4, 16,
         4, 16,  4,  4,  4,  4,  4, 16, 16, 16, 16,  4,  4, 16,  4, 16,  4,  4,
        16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 30,   9,  17,  38,  38,   9,  30,  38,  30,  38,   9,  38,   9,  30,
          9,  17,   9,   9,   9,  19,   9,  30,  38,  38,  30,   9,   9,  30,
         38,   9,   9,  38,  30,  30,   9,  38,  30,  30,   9,  38,   9,  17,
         30,   6,   6, 115,   9,  17,   9,   9,   9,  17,  38,  38,   9,   9,
         30,  19,  30,   9,   9,   9,  2

the top1 is:  tensor([ 4, 16,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,
        16, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,
        16, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4, 16, 16, 16,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,
        16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        16, 16], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,  30,  17,   9,   9,   9,  30,   9,  24,   9,  30,   9,   9,   9,
        115,   9,   9,  38,   9,   9,  38,  38,   9,  38,   6,   9, 115,   9,
          9,  38,   9,  30,   9,   9,   9,   6,  17,  30,  17,   9,   9,  30,
         38,  30,  38,   9,   9,  17,   4,   9,   4,  30,   9,   9,  30,  38,
          9,  38,   9,  24,   9,   9,   

the top1 is:  tensor([ 9, 38, 30, 30, 38,  9,  9,  9,  9, 30,  9,  9,  9,  9, 24,  9, 30,  9,
        30, 38,  9,  9, 30, 30, 30,  9,  9,  9,  9, 30, 24,  9,  9, 38,  9,  9,
         9,  9, 24,  9,  9, 38, 30,  9,  9, 30, 38,  9, 38, 38,  9,  9, 30, 24,
         9, 30, 38, 30, 38,  9,  9,  9,  9,  9,  9, 30,  9,  9, 38,  9,  9,  9,
        38, 24, 30, 30,  9,  9,  9, 38, 38,  9,  9,  9,  9, 30, 30,  9, 38,  9,
         9, 30, 30, 38,  9, 38,  9,  9,  9, 38, 30,  9,  9,  9,  9,  9,  9,  9,
         9, 30,  9, 38, 38,  9, 38,  9,  9, 38, 38,  9, 38, 30,  9,  9, 24,  9,
         9, 38], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 6, 12, 17, 17, 12,  6,  6,  6,  6, 17,  6,  6,  6,  6,  9,  6,  6,  6,
        17, 12,  6,  6, 17,  6, 17,  6,  6,  6,  6, 17,  9,  6,  6, 12,  6,  6,
         6,  6,  9,  6,  6, 12, 17,  6,  6, 17, 12,  6, 12, 12,  6,  6, 17, 33,
         6,  6, 12, 17, 12,  6,  6,  6,  6,  6,  6, 17,  6,  6, 12,  6,  6,  6,
        12,  9, 17, 17,  6,  6, 

the top1 is:  tensor([ 4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4, 16,  4,  4,  4,  4, 16,
         4, 16,  4,  4,  4,  4, 16,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4, 16,
         4, 16, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,
         4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([38,  9,  9, 38, 38, 30, 38, 38, 30,  9,  9, 30,  9,  9, 24,  9,  9,  9,
         9, 38,  9,  9,  9,  9,  9, 38, 30, 38,  9, 24,  9,  9,  9,  9,  9,  9,
         9, 30,  9,  9,  9,  9,  9, 30, 38,  9, 30, 38, 30, 38, 38,  9, 24, 30,
        38, 30,  9,  9,  9,  9, 30,  9, 38, 30, 24, 30,  9,  9,  9, 38,  9, 30,
        38, 30, 30,  9,  9, 30, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,
         4,  4, 16,  4,  4, 16, 16, 16,  4,  4,  4,  4, 16,  4, 16, 16,  4, 16,
         4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4, 16,  4, 16,  4, 16,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9, 38,  9,  9,  9, 38, 30,  9,  9,  9, 38,  9, 30, 30, 24,  9,  9,  9,
         9,  9, 30,  9,  9, 30, 30, 30,  9,  9, 38,  9, 30,  9, 30, 30,  9, 30,
         9,  9,  9,  9,  9, 30, 30,  9,  9,  9, 38,  9,  9,  9,  9,  9,  9, 38,
        24, 30, 30,  9,  9,  9,  9,  9,  9,  9, 38,  9,  9,  9,  9, 38, 24,  9,
         9, 30,  9, 30, 38, 38, 

the top1 is:  tensor([ 4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,
         4,  4, 16,  4,  4,  4,  4, 16, 16,  4,  4, 16,  4,  4,  4, 16,  4,  4,
         4,  4, 16, 16, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,  30,   9,   6,   9,   9,   9,   9,   9,  30,   9,   9,   9,   9,
          9,  30,   9,   9,   9,   6,   9,   9,   9,   9,   9,  24,  38,   9,
          9,   9,  35,  30,   9,   9,   9,  30,   9,  17,  38,   9,   9,   9,
          9,  30,   9,   9,   9,  30,  24,   9,   9,   9,   9,  30,   9,   9,
         30,  38,   9,  17,   9,  38,  3

the top1 is:  tensor([16, 16,  4, 16, 16,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,
         4, 16, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4, 16,  4,  4, 16,  4, 16,  4, 16,  4,  4,  4, 16, 16,  4, 16,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 30,  30,  10,  17,  30, 115,  30,  30,   9,   9,   9,  30,   9,  24,
          9,   9,   9,  30,   9,   9,   9,  38,   9,  30,  30,   9,   9,   9,
          9,  17,  30,   9,  24,  19,   9,   9, 115,  30,  30,  30,  38,   9,
          9,   9,   9,   9,   9,   9,   9,   9,   9,  38,  30,  17,   4, 115,
          9,   9,   9,   9,  17,   9,  3

the top1 is:  tensor([ 4, 16, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16, 16, 16, 16,  4,  4, 16,  4,  4, 16, 16, 16,  4, 16,  4, 16,  4,
         4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4,  4, 16, 16, 16,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4, 16,
        16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16, 16,  4,  4,  4, 16, 16, 16,  4, 16,  4,  4,  4, 16,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 38,  17,  24,   9,   6,   9,  17,   9,   9,  24,   9,   9,   9,   9,
         38,   9,   9,   9,   9,  30,  17,  30,  17,   9,   9,  30,  24,   9,
         30,  30,  30,   9,  30,   9,  17,   9,  10,   9,   9,  30,  30,   9,
         38,   6,   9,   9,   9,  30,  30,   9,   9,  24,   9,   9,   9,  17,
          9,  17,  24, 115,   9,  30,  3

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,
         4, 16, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4, 16, 16, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4,  4, 16, 16, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
        16,  4, 16, 16,  4,  4,  4, 16,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4,
         4, 16,  4,  4, 16,  4, 16,  4, 16,  4, 16,  4,  4, 16, 16,  4,  4,  4,
         4,  4,  4, 16,  4, 16, 16,  4,  4, 16,  4,  4, 16, 16,  4,  4, 16,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,   9,   9,  38,   9,   9,   9,   9,   9,  50,   9, 115,   9,   9,
         17,  24,  63,   9,  24,  30,  30,   9,   9,   9,   9,  17,   9,   9,
          9,   9,  30,   9,  35,  30,  30,  30,   9,   9,   9,   9,   9,  24,
          9,   9,   9,   9,   9,  35,  17,   9,   9,   9,   9,   9,   9,   9,
          9,  30,  30,  17,   9,   9,   

the top1 is:  tensor([ 4,  4,  4,  4,  4, 16, 16,  4,  4, 16, 16, 16,  4, 16,  4, 16,  4, 16,
         4, 16,  4,  4,  4,  4, 16, 16,  4, 16,  4, 16, 16,  4,  4,  4, 16,  4,
         4, 16,  4,  4,  4,  4, 16, 16, 16, 16,  4, 16,  4, 16,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4, 16, 16,  4,  4, 16,  4,  4, 16,  4,  4, 16,  4,
        16,  4,  4,  4, 16, 16, 16, 16,  4,  4,  4, 16,  4,  4, 16, 16,  4, 16,
        16,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,   9,   9,  24,   9,  30,  30,   9,  38,  30,  38,  50,   9,  30,
         35,  30,   9,  24,   9,  24,   9,   9,   6,   9,  30,  30,  24,  30,
         24,  17,  30,   9,  24,   9,  30,   9,   9,  30,   9,   9,   9,   9,
         30,  24,  30,  38,   6,  30,   9,  30, 115,   9,   9,   9,   9,  17,
          6, 115,   9,   9,  17,  17,   

the top1 is:  tensor([ 4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4, 16,  4, 16, 16, 16, 16, 16,
         4, 16,  4,  4,  4, 16,  4, 16,  4,  4,  4, 16, 16,  4,  4, 16, 16,  4,
         4,  4, 16,  4, 16,  4,  4, 16,  4,  4,  4, 16, 16, 16,  4, 16,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4, 16, 16, 16, 16, 16,
        16,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4, 16,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9, 30, 38, 30,  9,  9,  9, 30,  9, 30, 30, 30, 30, 30,
         9, 30,  9, 24,  9, 30,  9, 30, 24,  9,  9, 30, 30,  9,  9, 30, 30,  9,
         9,  9, 30,  9, 30,  9,  9, 30,  9,  9, 24, 30, 30, 30,  9, 30,  9, 24,
         9,  9, 24,  9, 24,  9, 30,  9,  9, 30,  9,  9,  9,  9,  9,  9,  9,  9,
         9, 24,  9, 30,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4, 16, 16, 16,  4,
        16,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
        16, 16,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4, 16,
        16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4,  4, 16, 16, 16,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9, 38, 38,  9,  9,  9, 38,  9, 24,  9, 30,  9, 30,  9, 30, 30, 30,  9,
        30, 38,  9, 30,  9, 24,  9,  9, 30,  9,  9,  9,  9,  9,  9,  9, 30,  9,
         9, 38, 30,  9,  9,  9,  9,  9,  9, 38, 30, 38,  9,  9,  9,  9, 38,  9,
        30, 30,  9, 30,  9,  9, 30,  9,  9, 30,  9,  9, 30,  9,  9, 38, 24, 30,
        30,  9, 24,  9, 30, 24, 

the top1 is:  tensor([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([ 4, 16,  4, 16, 16,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
        16,  4, 16,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4, 16,  4,  4,
        16,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,
        16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4, 16, 16,  4,  4,
         4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4,  4, 16, 16,  4, 16,  4,  4,  4,  4,  4,  4,
         4, 16], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9, 30, 38, 30, 30,  9, 38,  9, 30,  9, 30,  9,  9, 24,  9,  9, 24, 30,
         9,  9,  9, 30,  9,  9, 30, 24,  9,  9,  9,  9, 30,  9,  9,  9, 38,  9,
        30, 24, 30,  9,  9,  9,  9,  9, 30, 30, 38, 38, 24,  9,  9, 30, 38,  9,
        30, 38, 30,  9,  9, 30,  9,  9,  9,  9,  9, 24, 24,  9,  9, 30,  9,  9,
        30,  9,  9,  9,  9,  9, 

the top1 is:  tensor([ 4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16,  4, 16, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4, 16], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,  38,  19,   9,   9,   9,   9,  30,   9,  35,   6,   9,   9,   9,
          9,   9,  38, 115,   9,  30,   9,  30,  19,   9,  24,  38,  17,   9,
         24,  35,   9,   9,   9,   9,   9,   9,   9,   9,  19,   9, 112,   9,
          9,   9,  30,  38,   9,   9,   9,   9,  19,   9,   9,   6,   9,   9,
         19,  35,  30,  19,   9,   9,   

the top1 is:  tensor([16, 16, 16,  4, 16,  4,  4, 16,  4,  4,  4, 16,  4,  4, 16,  4,  4, 16,
        16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,
         4,  4,  4, 16, 16,  4,  4,  4,  4,  4, 16,  4, 16, 16,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4, 19,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,
         4, 16], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([19, 19, 30, 38, 30,  9,  9, 30,  9,  9,  9, 30,  9,  9, 30, 38, 38, 30,
        19,  9, 38,  9,  9,  9,  9,  9,  9,  9, 38,  9,  9,  9, 19, 38, 30,  9,
         9,  9, 24, 19, 19, 38,  9, 38,  9,  9, 30,  9, 19, 30,  9, 35,  9, 30,
         9,  9, 38, 24,  9, 38,  9,  9,  9,  9, 19,  9,  9,  9, 38,  9, 30,  9,
         9, 38,  9,  9, 19,  9, 

the top1 is:  tensor([ 4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4, 16,  4, 16, 16, 16,  4,  4,  4,  4,  4,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 14,  30,   9,   9,   9,   9,   9,   9,   6,   9,  38,  24,  14,   9,
         19,   9,  24,  14,  24,   9,   9,   9,  38,  14,   9,   9,  19,  19,
          9,   9,   9,   9,  19,   6,   9,   9,   9,   9,  24,  38,  17,   6,
         38,  19,   9,  30,  30,  30,   9,  12,  19,  38,   9,   9,  38,  24,
         30,  19,  38,   9,   9,  38,  3

the top1 is:  tensor([ 4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,
         4, 16,  4, 16,  4, 16,  4, 16, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,
        16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16,  4,  4, 16,  4, 16,  4, 16, 16,  4,  4,  4, 16,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9, 24, 30,  9, 14,  9, 24, 14,  9,  9,  9, 24, 38, 30,  9, 38, 24, 30,
         9, 30, 24, 30, 38, 30,  9, 30, 30, 24, 24,  9,  9, 30,  9,  9,  9,  9,
         9, 38, 38, 38,  9,  9,  9,  9,  9, 38,  9,  9,  9,  9, 30,  9, 30, 14,
        30, 35, 14,  9,  9,  9,  9, 24,  9,  9, 38,  9,  9,  9, 30,  9,  9,  9,
        38,  9,  9,  9, 38,  9, 

the top1 is:  tensor([ 4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4, 16,
         4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        16,  4,  4,  4, 16, 16, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,
         4,  4,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,
        16, 16, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4, 16,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9, 38,  9, 30,  9, 30, 38, 24,  9,  9, 38, 30,  9,  9, 30,  9, 38, 30,
        14, 30,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 24,  9,  9,
        30,  9, 14,  9, 30, 19, 30, 14,  9, 14,  9, 24,  9, 30, 14,  9,  9,  9,
         9,  9, 30,  9,  9, 38, 30,  9, 38,  9,  9,  9, 19,  9, 30,  9, 24,  9,
         9,  9,  9, 30,  9,  9, 

the top1 is:  tensor([16, 16,  4, 16,  4,  4,  4,  4, 16, 16,  4, 16,  4, 16,  4,  4,  4, 16,
        16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4, 16, 16, 16,
         4, 16,  4,  4,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4, 16, 16,  4,  4, 16,  4,  4,  4,  4,  4,
         4, 16,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
         4, 16], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 19,  30,   9,  30,   9,   9,   6,   9,  17,  30,  38,  30,   9,  30,
         35,  38,   9,  19,  19,   9,   9,  38,   9,  12,  14,  30,   9,   9,
          9,   9,   9,  17,   6,  19,  19,  17,   9,  19, 115,   9,  38,  19,
         38,  38,   6,   9,   9,  38,  38,  14,  38,   9,   9,  17,   6,   9,
         38,   9,   9,   9,  30,   9,   

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16, 16, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,
        16, 16,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4, 16,  4, 16, 16,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,  38,   9,   9,   9,   9,   9,  38,  30,   6,   9,   9,  38,  30,
          6,  38,  17,   9,   9,  24,   9,   9,  35,  38,   9,   9,   9,   9,
         19,   9,   9,   9,  38,  17,  14,   9,  38,   9,   9,   9,   9,   9,
         30,  19,  19,   9,   9,   9,   9,   9,  14,   9,  30,  19,   9,   6,
         38,   0,   9,   9,   9, 115,   

the top1 is:  tensor([16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,
        16, 16, 16,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,
         4, 16,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,
        16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4, 16, 16,  4,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([30,  9,  9,  9,  9,  9, 14,  9,  9,  9, 38, 38, 38,  9, 30,  9,  9, 30,
        30, 30, 30, 38, 30,  9, 38, 38,  9,  9,  9, 30, 38,  9, 30,  9,  9, 38,
        38, 30,  9, 30, 38, 38, 38, 30,  9,  9,  9, 38, 38, 38, 38, 30, 38,  9,
        30,  9,  9,  9,  9,  9, 14,  9, 30, 38,  9,  9,  9,  9,  9, 38,  9, 38,
         9,  9, 14, 38,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16, 16,
         4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4, 16,  4,
         4,  4,  4,  4, 16,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4, 16,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
        16,  4, 16, 16,  4,  4,  4,  4, 16, 16,  4, 16,  4,  4,  4, 16,  4,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4, 16,  4,  4,  4, 16,  4, 16,  4, 16,  4,  4,  4, 16, 16, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([38,  9,  9,  9,  9,  9,  9,  9, 38, 38,  9,  9, 30,  9, 38,  9, 30, 30,
         9, 38, 30, 38,  9,  9,  9,  9,  9,  9,  9, 30, 30,  9, 38,  9, 30,  9,
         9,  9,  9, 14, 30, 38, 30, 38, 30,  9, 38, 38,  9, 38,  9,  9, 30,  9,
         9, 30, 38,  9, 30,  9, 30,  9,  9, 38, 38,  9,  9,  9,  9,  9, 30,  9,
        30,  9, 30, 30, 38,  9, 

the top1 is:  tensor([16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,
         4,  4,  4,  4, 16,  4,  4,  4, 16, 16,  4,  4, 16, 16,  4, 16,  4, 16,
         4,  4, 16,  4,  4, 16, 16,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,
        16,  4,  4, 16,  4,  4, 16,  4, 16, 16,  4, 16,  4,  4,  4,  4, 16,  4,
         4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 30,   9,  38,  38,   9,  17,  38,   9,   9,   9,   9,  38, 115,  30,
         38,   9,   9,   9,   9,  30,   9,   9,   9,   9,  17,   9,  38,  30,
         38,   9,  38,   9,   6,  38,   9,   9,  38,  17,   9,   9,  17,   6,
          9,   9,  38,  30,  14,   9,  38,  30,   9,  30,   9,  30,  38,   4,
         38,  38, 115,  38,  30,   9,   

the top1 is:  tensor([16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16, 16,
        16,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        16, 16,  4,  4,  4, 16, 16,  4, 16,  4, 16,  4,  4,  4,  4,  4, 16, 16,
        16, 16, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4,
         4, 16,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16, 16,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 30,   9,   9, 115,   9,  30,   9,   9,   9,   9,   9,  19,   9, 115,
         38,   9,  17,  38,  38,   9,   9,  38,   9,   9,   9,   9,  17,  17,
          9,  38,   6,  38,   9,   9,  17,  38,   9,   9,   9,  38,   9,   9,
          9,   9,  38,  38,  38,   9,   9,  38,   6,   6,  30,   4,  38,   9,
          9,  30,  38,  35,   9,   9,  1

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
         4, 16, 16,  4,  4, 16,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4, 16,  4,
         4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 38,   9,   9,  38,  14,   9,   9,  38,   4,   9,   6,   9,   9,   9,
          9,  14,   6,   6,   9,  17,  38,   9,  30,   9,   9,  19,   9,   9,
          9,   9, 115,  30,   9,  38,   6,  38,   6,   9,   6,  17,   9,   9,
          6,   9,  38,   9,  17,   9,  38,   9,  17,  38,   9,  19,  30,  14,
          9,  38,  38,   9,   9,  38,   

the top1 is:  tensor([16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,
         4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,
        16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
        16,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 30,   9,   9,  38,   9, 115,  38,  19,   9,  38,  17,  19,  19, 115,
         38,   6,  30,   9,   6,  30,  38,   9,   9,  19,  38,  19, 115,   9,
          9,  38,  19,   9,  19,   9,   9,  38,   9,   6,  38,   9, 115,  38,
          9,   9,   9,   9,   9,  19,   9,   9,   9,  38, 115,  14,  19,   9,
          9,   9,  30,  30, 115,  14,   

the top1 is:  tensor([16,  4, 16,  4,  4, 16, 16,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,
        16,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,
         4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4, 16,  4,  4, 16, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16, 16,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([30,  9, 30, 38,  9, 30, 30,  9,  9, 14,  9,  9, 30,  9, 30, 38, 38,  9,
        30,  9, 38,  9,  9, 38, 19, 30, 38, 38, 38,  9, 38, 19,  9,  9, 14,  9,
        14,  9, 30,  9, 30, 38, 38,  9,  9, 14, 38, 38, 35, 30,  9,  9, 38,  9,
         9, 38,  9, 38, 14, 30,  9, 30,  9,  9,  9,  9,  9,  9,  9,  9, 38,  9,
         9, 30,  9,  9, 30, 38, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,
        16,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16,  4, 16, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16, 16,
         4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4, 16,  4, 16,  4,  4, 16, 16,
         4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
         4, 16], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([35, 38,  9,  9, 14, 38,  9, 30,  9, 38,  9, 38, 38, 30, 14, 14, 38, 38,
        30, 38, 38, 14, 30,  9,  9,  9,  9, 38, 30,  9,  9, 38, 14, 38, 14,  9,
        14, 38, 30,  9, 30, 30, 38,  9, 30,  9,  9,  9,  9, 38,  9,  9, 30, 19,
         9,  9, 14, 38,  9, 30, 38, 38,  9,  9, 30, 30,  9, 14,  9,  9, 38, 30,
        14, 14, 38,  9,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4, 16,  4,
        16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16, 16,  4,  4,  4,  4, 16,
         4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4, 16,  4, 16, 16,  4,
        16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16, 16,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4,
         4,  4,  4,  4,  4, 16,  4, 16,  4,  4, 16,  4,  4,  4,  4, 16, 16,  4,
        16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9, 115,  19,   9,   9,   6,  14,  38,  17,   9,   9,   9,  14,  30,
          9,  38,  19,  35,  19,   9,   9,  38,   9,   9,   6,  30,   9,   9,
         17,  19,  38,   9,   9,  14,   9,  19,   6,  14,  19,  38,  14,   9,
         30,   9,   9,  38,  38,  38,   9,  30,   9,  19,  19,   9,  17,   9,
          9,   9,  14,   9,   9,   9,   

the top1 is:  tensor([16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,
         4,  4,  4,  4,  4,  7,  4,  4, 16,  4,  4,  4,  4, 16,  4, 16,  4,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16, 16, 16,  4, 16,  4,  4,  4,
         4, 16,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,
         4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,
        19,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 30,   6,   9, 115,  38,  38,  14,   9,   9,   9,   9,   9,   9,   9,
         30,   6,  38,  30,  17,   9,  38,   9,  14,  19,   9,   9,  19,   9,
         19,  17,  38,  30,  38,  30,   9,   9,   9, 115,  19,   4,  38,   9,
          9,   9,  38,   9,  19,  30,  19,  38,   0,  14,   9,   9,   9,  19,
         38,   9,  19,   9,  38,  30,   

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4, 16,
        16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 19,  4,  4, 16,
         4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4, 19,  4,  4,  4,  4,  4,  4,
        16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,
         4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([14,  9,  9,  9,  9,  9,  9,  9, 30, 38, 38, 19,  9,  9, 30,  9,  9, 30,
        19, 38, 19, 38,  9,  9,  9, 38, 14,  9, 38,  9,  9,  9, 17,  9,  9, 19,
         9,  9,  9,  9,  9, 19,  9, 38, 19, 38, 38, 17,  9,  9,  9,  9, 38,  9,
        30, 19,  9,  9, 14,  9, 14, 38,  9,  9,  9,  9,  9, 38,  9,  9,  9, 38,
        38,  9, 38, 19,  9,  9, 

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4, 19,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,
        16, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,
         4, 16,  4,  4,  4, 19,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4, 16,
         4,  4,  4,  4,  4,  4,  4, 16, 19,  4, 16,  4,  4, 16,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9,  9,  9,  9,  9,  9,  9,  9, 38,  9,  9,  9,  9,  9,  9, 19,  9,
         9,  9,  9, 38,  9,  9, 38, 19,  9,  9, 19, 38,  9,  9,  9, 14, 19, 38,
         9,  9,  9, 35, 30, 38,  9,  9,  9, 17, 38,  9,  9,  9,  9,  9,  9,  9,
        38,  9,  9,  9, 19,  9,  9,  9,  9, 38, 38,  9,  9,  9,  9,  9, 19, 19,
        19, 19, 19,  9, 38,  9, 

the top1 is:  tensor([ 4,  4, 19,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16, 16, 16,
         4,  4, 16,  4,  4, 16, 16,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,
        16,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,
         4,  4, 16,  4, 16,  4, 16,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,
        19,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4, 19,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,   9,  17,   9,  38,   9,   9,  19,  38,   9,   9,   9,   6,  19,
         19,  19,  17,  19,   9,   9,  30,   9,  35,  19,  19,   9,   9,   9,
         30,  19,  19,   9,  38,   9,  38,  38,  19,   9,  17,   9,  19,   9,
         19,  17,   9,   6, 115,   9,  19,   9,   9,  38,   9,  19,  38,   9,
         19,   9,  19,  14,  19,  38,  3

the top1 is:  tensor([ 4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4, 16,  4, 16,  4, 19,  4,  4, 16,  4, 16,  4, 16,  4, 16,  4,
         4,  4, 16,  4,  4,  4, 16,  4, 16,  4, 16, 16, 16,  4,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4, 16, 16,  4,  4,  4, 16,  4, 16,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4, 16, 16, 16,  4,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4,  4,
         4, 16], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9, 19, 19,  9,  9,  9,  9,  9,  9, 35, 38,  9,  9,  9,  9,  9,  9,  9,
        38,  9, 19,  9,  9, 38,  9, 38,  9,  9, 38, 38,  9, 38,  9,  9,  9,  9,
         9, 19,  9, 19,  9, 19,  9, 17, 38, 38, 19, 38, 19,  9, 19,  9, 19,  9,
         9,  9, 19, 38,  9,  9, 19, 38, 19,  9, 19, 19, 19,  9,  9,  9, 38, 38,
        38, 19,  9,  9, 38,  9, 

the top1 is:  tensor([ 4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4, 16,  4,
         4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,
        16,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4, 16, 16,
        16, 16, 16,  4,  4, 16, 16, 16, 16,  4, 16,  4,  4,  4,  4, 16,  4, 16,
        16, 16,  4,  4, 16, 16,  4,  4,  4,  4, 16,  4,  4, 16, 16,  4,  4, 16,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 38,  19,   9,   6,   9,   9,  35,  35,   9,  19,  38,   9,  19,   9,
          9,   9,  19,  38,   9,  19,   9,   9,   9,  19,   9,   9,   9,   9,
        106,   9,  35,  35,   9,  19,  38,   9,   9,   9,  19,   9,   9,  17,
         38,  19,   9,   9,   9,   9,   9,   9,  19,   9,  19,  38,  17,   9,
         38,   9,  19, 115,   6,   9,   

the top1 is:  tensor([ 4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4, 16, 16,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,
        16,  4,  4, 16,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,
         4,  4, 16, 16,  4,  4,  4,  4, 16, 16, 16,  4,  4,  4,  4,  4, 16,  4,
         4, 16,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9, 30,  9,  9, 38, 24,  9, 19,  9,  9, 24,  9, 30, 38, 30, 30,  9, 30,
         9, 14, 38,  9,  9, 24,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 14,  9,
         9, 38, 24,  9,  9,  9, 30,  9,  9, 24,  9,  9,  9,  9,  9, 30, 14, 30,
        19,  9, 38, 30, 38,  9, 30,  9, 38,  9, 19, 38, 38, 38,  9, 38,  9, 30,
        38,  9, 30, 30,  9, 38, 

the top1 is:  tensor([ 4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,
         4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4, 16,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,
         4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16,  4,  4,  4,  4, 16,  4, 16, 16,  4, 16, 16,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,
         4, 16], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,   9,   9,   9,  17,   9,  24,  38,  38,   9,   9,   4,  30,  30,
         14,   9,   6,   9,   9,   9,  30,   9,   9,   9,   9,   9,   9, 115,
          9,  30,  17,   9,  19,  17,   9,  17,  14, 115,  24,   9,   9,   6,
          9,   9,   9,   9,  30,  19,  14,  38,   9,   9,  17,  38,   9,  30,
         30,  38,   6,  17,  38,   6,  2

the top1 is:  tensor([ 4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,
        16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,
         4, 16,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,
         4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4, 16,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4, 16, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,  14,  30,  14,  14, 115,  24,   6,  30,  38,   9,   9,   9,   6,
          9,   9,   9,  30,  17,   9,   9,  35,  24,  38,  14,   6,   9,   9,
         14,   6,   9,  17,   9,   6,   9,   9,   9,   9,  14,  30,  30,  14,
          6,   6,   9,   9,   9,  30,   9,   9,   9,  12,  38,  14,   9,  17,
          9,  30,  14,   9,  30,  35,  2

the top1 is:  tensor([16,  4,  4,  4,  4,  4, 16, 16,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4, 16, 16,
         4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
        16,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,
         4,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 30,  38,   6,   9,  35, 115,  38,  38,  38,  17,  30,  38,   9,  30,
          9, 115,  14,  35,  35,   9,  38,  30,  35,   9,   9,  14,  14,   9,
         30,  30,  35,   9, 115,   6,  19,  30,  17, 115,  14,  30,   9,  14,
         35,  35,  14,   6,   9,   9,   9,   9,  38,  17,   9,   9,   9,  35,
         14,   6,  17,   9,   9,  17,  2

the top1 is:  tensor([ 4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4, 16,  4,  4, 16, 16,  4, 16,  4,  4,  4, 16,  4,  4, 16,  4,  4,
        16,  4,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 38,  14,   9,   9,  19,   9,  14, 115,   9,  35,  14,  14,  14,   9,
          9,  14,  14,  17,  38,  30,  38,   9,  30,  35,   6,  14,   6,   9,
          9,   9,   9,  14,   9,   9,   9,   9,   9,  30,   9,   6,   9,   9,
         38,  35,  35,   9,   9,   9,   9,   9,  14,   9, 115,  38,   9,  38,
         10,  19,  30,  38,  24,  14,   

the top1 is:  tensor([ 4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16, 16, 16,
         4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4, 16,  4,  4,
         4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([38, 14, 30, 38, 38,  9, 14,  9, 14, 30,  9, 38, 38, 14, 14, 30, 30, 30,
         9, 30, 38,  9,  9, 30, 14, 14, 38, 38, 14, 30, 38, 30, 38, 38, 14, 14,
        24, 14, 38,  9, 14,  9,  9, 14, 14, 14, 30, 35, 30, 38,  9, 30,  9, 35,
        38,  9,  9,  9,  9, 38, 14, 30,  9, 30,  9, 30, 14, 14, 14,  9, 38, 14,
         9,  9,  9, 38,  9, 38, 

the top1 is:  tensor([16,  4,  4,  4,  4,  4, 16, 16, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 16,  4, 16, 16,  4,  4, 16, 16,  4, 16,  4, 16,  4,  4,  4,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,
         4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([30, 14, 24,  9,  9, 38, 30, 30, 30, 30,  9, 38, 38,  9, 24,  9,  9, 14,
         9, 38, 30,  9, 30, 30,  9,  9, 30, 30,  9, 30,  9, 30, 38, 14, 38, 38,
        14,  9,  9, 38, 30,  9, 38,  9, 38, 38, 14,  9,  9, 30, 30, 14,  9, 14,
        38, 30,  9, 14, 30, 38,  9, 38,  9, 38,  9, 14,  9,  9,  9,  9, 14, 38,
         9, 38,  9, 35,  9,  9, 

the top1 is:  tensor([16,  4, 16,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,
        16,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,
        16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  7,  4,  4,  4, 16,  4, 16,  4, 16,  4, 16, 16,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        16,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 17,   9,  30,  38,   9,  38,  30,  14,  14,   9,   9,   9,   9,  35,
         19,  24,  30,  38,  38,   6,   9,   9,  30,   9,   9, 115,   9,  38,
         30,   9,   6,  14,   9,   9,  30, 115,  30,   6,  38,   9,   9,  30,
          9,   9,  19,   9,   9,  38,  19,  30,   9,   9,  14,   9,  30,   9,
          6,   6,   9,   9,   9,  14,   

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,
         4, 16,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4, 16, 16,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4, 16, 16, 16,  4,  4,  4,  4,  4,  4, 16, 16,
        16, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16, 16,  4,  4,  4, 16, 16,  4,
         4,  4, 16, 16, 16, 16,  4,  4, 16, 16,  4, 16, 16,  4, 16, 16,  4,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 9,  9, 14,  9,  9, 38,  9, 30,  9, 30,  9,  9, 38,  9, 14, 30,  9, 38,
        38, 30, 38, 38, 38, 30, 24, 30,  9,  9, 38, 14, 14, 30, 30,  9, 38,  9,
         9,  9,  9,  9,  9,  9,  9, 30, 30, 30,  9,  9,  9,  9,  9,  9, 30, 30,
        30, 30,  9,  9,  9,  9, 30, 38, 14,  9,  9,  9, 30, 38,  9,  9,  9,  9,
        38,  9,  9,  9, 24, 30, 

the top1 is:  tensor([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 5, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3,
        3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 3, 5, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        5, 3, 3, 3, 3, 3, 3, 3], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor

the top1 is:  tensor([16,  4, 16,  4,  4,  4, 48,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
        16, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 30,   9,  30,  35,   9,   9,  38,   9,   9,  38,  14,   9,   9,  24,
          9,  17, 233, 233,   9,  30,   9,   9,   9,   9, 112,   9,  30,  38,
          9,   9,  38,   9,   9,   9,  30,   9,  35,  30,  38,   9,  35,   9,
         14,   9,   9,   9,   9,   9,   9,   9,   9,   9,  19,   9,  38,   9,
          9,   9,   9,  30,  30,   9,  3

the top1 is:  tensor([16, 16, 16,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        16, 16,  4, 16,  4,  4,  4,  4,  4,  4, 19,  4,  4,  4,  4, 16,  4,  4,
         4,  4,  4, 16, 16,  4,  4,  4, 48,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 16,  4,
        16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 48,  4,  4,  4, 16,  4,
        48, 16,  4,  4, 16,  4,  4, 48, 16, 16,  4,  4, 16, 16,  4,  4,  4,  4,
        16,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 30,  19,  30,   9,  30,  35,   9,  30,  38,   9,  14,  35,  38,   9,
          9,  14,  19, 233,  30,  30,  17,  30,   9,  38,   9,   9,   6,   9,
         19,   9,   6,   9,   9, 112,  38, 233,  38,   9,  14,  30,  30,   9,
          9,   9,  17,  19,  12,   9,   9,  30, 233,   9,   9,  38,  12,  30,
         38,  14,   9,  17,  38, 233,  2

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4, 19, 16,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,
         4,  4,  4, 16, 16,  4,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,
         4, 16,  4, 16,  4, 16,  4,  4,  4,  4, 16, 16, 19,  4,  4,  4, 16,  4,
         4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4, 48, 16,  4,
         4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 38,   9,   9,  14,  38,   9,  24,  38,  38,   9,  38,  14,   9,   9,
         14,  14,  38,  38,  38,  38,  17,  19,   9,  38,  19,   9,   9,  19,
          9,   9,   9,  38,   9,   9,  14,   9,   9,  38,  24,   9,  30,   9,
         24,  35,  38,  38,  38,  24,   9,  30,  38,  14,  38,  14,   9,   9,
          9,  19,  19,   9,  38,  38,   

the top1 is:  tensor([ 4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4,  4, 16,  4, 16,  4,  4, 16,  4, 16,  4,  4,  4,  4,  4, 16,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         7,  4,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4],
       device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([  9,   9,  19,   9,  38,   9,  14,   9,   9,  38,  24,   9,  30,  38,
          9,  14,   9,  14,   9,  38,  30,  38,  30,  38,  14,  19,   9,  19,
         14,  14,   9,  14,  38, 112,  24,   9,   9,   9,  14,   9,  14,  14,
          9,   9,   9,   9,  35,   9,  30,   9,   9,   9,  35,   9,  19,  38,
         38,   9,  30,  19,   9,   9,  38,   9,  14,   9,   9,   9,  19,   9,
         14,  38], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([10, 10, 38,  6, 17,  6,  6,  6,  6, 12,  6,  6, 17, 12,  6, 12,  6,  6,
         6, 12,  6,  6, 17,  6,  6, 17,  6, 17,  6, 14,

the top1 is:  tensor([ 4,  4, 16,  4,  4, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4, 16, 16,
         4,  4,  4,  7,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,  4,  4, 16,
         4, 16,  4,  4,  4,  4,  4,  4, 16,  4, 16, 16,  4,  4,  4,  4, 16,  4,
        16,  4,  4,  4, 16,  4,  4,  4,  4,  4, 16,  4, 16, 16,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16, 16,  4, 16,  4, 16,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 16, 16,  4, 16,  4,  4,  4, 16,  4, 16,  4, 16,  4, 16,  4,  4,  4,
         4,  4], device='cuda:0', grad_fn=<NotImplemented>)
the top1 is:  tensor([ 10,   9,  19,   9,  12,  19,  38,  35,  30,   6,   9,  38, 233,   9,
         38,  14,  30,  19,   6,  17,  14,  14,   9,   9,   9,  38,   6,  17,
         14,   9,   9,  19,  14,  38,  14, 112,   9,  30,   9,  17,   9,  38,
         14,  35,  19,  38,  19,  19,   9,   9,  17,   9,  19,   9,  19,   9,
         14,  38, 112,   9,  14,   9,   

the top1 is:  tensor([ 4,  4,  4,  4,  4,  4, 16,  4, 16,  4,  4, 16,  4, 16,  4,  4,  4,  4,
        16,  4, 16, 16,  4,  4, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4, 16,
         4, 16, 16,  4, 16,  4,  4, 16,  4,  4,  4,  4, 16,  4,  4,  4,  4,  4,
         4, 16,  4,  4, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16,  4,
         4, 16, 16, 16,  4, 16,  4,  4,  4, 16,  4, 16,  4, 16, 16,  4,  4,  4,
         4,  4,  4, 16,  4,  4,  4, 16, 16,  4, 16, 16, 16,  4,  4,  4, 16,  4,
         4,  4, 16, 16, 16,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16, 16,  4,  4,
        16,  4], device='cuda:0')
the top1 is:  tensor([  9,   9,   9,   9,   9,   9,  30,  38,  19,  14,  38,  30,   9,  30,
         14,  38,   9,  14,  30,  38,  30,  30,   9,  14,  30,   9,   9,  14,
          9,  38,  14,  35,  38,  19,  38,  30,  38,  30,  30,  14,  30,   9,
          9,  30,  14,  38,  38,  38,  30,  38,   9,  38,   9,  35,  35,  30,
         14,   9,  30,  30,   9,   9,  38,  38,   9,  24,  14,   9

the top1 is:  tensor([ 3,  3,  5,  4,  5,  3,  3,  3,  3,  3,  4,  4,  5,  4,  5,  4,  5,  3,
         3,  5,  3,  3,  5,  3,  3,  3,  5,  4,  3,  5,  3,  3,  3,  3,  5,  4,
         5, 28,  4,  0,  5,  4,  5,  5,  0,  5,  5,  3,  5,  5,  3,  4,  5,  0,
         3,  4,  3,  4,  3,  4,  3,  4,  4,  4,  5,  4,  5,  4,  4,  4,  3,  3,
         4,  3,  3,  3,  3, 15,  5,  3,  3,  5,  4,  4,  5,  4,  5,  3,  3,  3,
         3,  3,  4,  3,  4,  5,  3,  3,  3,  4,  3,  4,  3,  3,  5,  3,  4,  0,
         3,  3,  3,  3,  3,  3,  3,  3,  5,  5,  5,  5,  3,  3,  3,  5,  5,  3,
         7,  3], device='cuda:0')
the top1 is:  tensor([ 3,  3,  3,  5,  3,  3,  3,  3,  3,  3,  4,  5,  3,  5,  3,  5,  3,  3,
         3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  3,  3,  3,  3,  3,  3,  3,  5,
         3,  4,  0,  5,  3,  5,  3,  3,  5,  3,  3,  3,  3,  3,  3,  4,  3,  5,
         3,  4,  3,  4,  3,  5,  3,  4,  4,  4,  3,  5,  3,  4,  4,  4,  3,  3,
         4,  3,  3,  3,  3, 11,  3,  3,  3,  3,  4,  5,  3

the top1 is:  tensor([ 4,  4,  4, 11,  4,  4,  4,  4,  4,  5,  5,  0,  3,  4,  4,  3,  3,  5,
         5,  4,  4,  3,  4,  4, 11,  0,  5,  3,  4,  4,  3,  4,  3,  3,  5,  3,
         3,  3,  5,  3,  3,  3,  5,  3,  4,  3,  5,  3,  4,  3,  3,  3,  5,  3,
         3,  3,  4,  4,  3,  3,  3,  0,  3,  3,  5,  3,  3,  3,  4,  0,  3,  4,
         3,  4,  3,  5,  3,  4,  5, 11,  4,  3,  3, 15,  3,  3,  3,  3,  5,  0,
         0,  3,  5,  3,  3,  5,  5,  0,  3,  3,  3,  3, 15,  3,  3,  4,  3,  3,
         3,  5,  3,  5,  3,  5,  4,  4,  5,  3,  5,  3,  3,  3,  5,  4,  5,  3,
         3,  4], device='cuda:0')
the top1 is:  tensor([ 4,  4,  4,  4,  5,  5,  4,  4,  0,  3,  3,  5,  3,  5,  4,  3,  3,  3,
         3,  4,  4,  3,  4,  4,  4,  5,  3,  3,  5, 11,  3,  4,  3,  3,  3,  3,
         3,  3,  3,  3,  3,  3,  3,  3,  5,  3,  3,  3,  5,  3,  3,  3,  3,  3,
         3,  3,  4,  5,  3,  3,  3,  5,  3,  3,  3,  3,  3,  3,  0,  5,  3,  4,
         3,  4,  3,  3,  3,  4,  3,  4,  4,  3,  3, 11,  3

the top1 is:  tensor([ 4, 23, 11,  4,  4,  4, 23, 11, 36,  4, 11, 11, 11, 23,  4, 11, 23, 23,
         4,  4, 23,  4, 11, 11,  6, 11,  4,  4,  6,  4,  4,  4,  4, 11,  4,  4,
         4, 23,  4, 23, 10,  4,  4,  4, 11, 23,  4,  4, 23, 11,  4,  4, 23,  4,
         4,  4,  4,  4,  4,  4,  0,  4, 17,  4,  4,  4,  4,  4,  4,  4, 11,  4,
         4,  4,  4,  4,  8,  6,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 10, 11,
         4, 23,  4,  4,  4,  6,  8,  4, 18,  4,  4,  4,  6, 23, 11, 23,  4, 11,
         4, 23, 11,  4, 23, 11,  4, 23, 23,  4, 23, 11,  6, 11,  4,  4, 11,  4,
         4, 23], device='cuda:0')
the top1 is:  tensor([ 4, 10,  4,  4,  4,  0, 10,  4,  8,  4,  4,  4,  4, 11,  4,  4, 11, 11,
         4,  4, 11,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4, 11,  4, 11, 36,  4,  4,  4,  4, 11,  4,  4, 10,  4,  4,  4, 11,  4,
         4,  4,  4,  4,  0,  4,  6,  4,  6,  4,  4,  4, 11,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4, 11,  4,  4,  4

the top1 is:  tensor([ 4,  4,  4,  4,  4, 11, 11,  4,  4, 23, 11,  4,  4, 11,  4,  4,  4,  4,
        11,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 28,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 11,  4,  0, 11,  0,
         4,  4, 39,  4,  4, 11,  4,  4,  4,  4,  4,  4,  4, 39,  4,  4,  4,  4,
        11,  4,  4,  5, 39,  7,  4,  5,  4,  5,  5,  4,  4,  4,  4,  4, 11,  4,
         0,  4,  4,  4,  4, 11,  4,  5,  4,  4,  7,  4,  4,  4, 11,  4,  4,  4,
         5,  4,  4,  4,  4,  4,  4,  4,  4,  5], device='cuda:0')
the top1 is:  tensor([ 0,  4,  4,  4,  4,  4,  4,  4,  4, 11,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  5,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        11,  4,  4,  4,  4,  4,  5,  4,  4,  7,  4,  4,  4,  4,  4, 15,  4, 28,
         4,  4,  5,  4,  4,  4,  4,  4,  5,  4,  5,  0,  4,  5,  4,  4,  4,  0,
         4,  5,  4,  3,  5, 39,  4,  3,  4,  3,  3,  4,  5,  4,  5,  5,  4,  5,
         5,  4,  4,  5,  4

NameError: name 'epoch_mins' is not defined