In [1]:
import importlib

import model
import pan_loader
import base_config
import loss_functions as L

import utils

import torch
import torch.nn as nn
import torch.optim as optim

import os
import time
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"


In [2]:
data_dir = "/home/aravind/dataset/"
ann_dir = data_dir + "annotations/panoptic/"

train_img_dir = data_dir + "train2017/"
train_seg_dir = ann_dir + "panoptic_train2017/"
train_ann_json = ann_dir + "panoptic_train2017.json"

val_img_dir = data_dir + "val2017/"
val_seg_dir = ann_dir + "panoptic_val2017/"
val_ann_json = ann_dir + "panoptic_val2017.json"

# train_img_dir = val_img_dir 
# train_seg_dir = val_seg_dir 
# train_ann_json = val_ann_json 

In [3]:
with open(val_ann_json,"r") as f:
    val_ann = json.load(f)
with open(train_ann_json,"r") as f:
    train_ann = json.load(f)

In [4]:
config = base_config.Config()

In [5]:
train_loader = pan_loader.get_loader(train_img_dir, train_seg_dir, train_ann, config)
val_loader = pan_loader.get_loader(val_img_dir, val_seg_dir, val_ann, config)

In [7]:
net = model.hgmodel()
# model_dir="models/"
# model_name="first_0.pt"
# pretrained_dict = torch.load(model_dir+model_name)
# net_dict = net.state_dict()

# pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in net_dict}
# net_dict.update(pretrained_dict) 
# net.load_state_dict(net_dict)


In [8]:
def set_trainable(module,state):
    for param in module.parameters():
        param.requires_grad = state

set_trainable(net,False)

set_trainable(net.mb0,True)
set_trainable(net.mb1,True)
set_trainable(net.cb,True)

set_trainable(net.iresnet0, False)
set_trainable(net.iresnet1, False)

for name,module in net.iresnet0.named_modules():
    if 'copy' in name:
        set_trainable(module, True)
    if isinstance(module,nn.BatchNorm2d):
        set_trainable(module, False)
        module.eval()
        
param_lr = []
param_lr.append({'params': net.mb0.parameters(),'lr':1e-3,'momentum':0.9})
param_lr.append({'params': net.mb1.parameters(),'lr':1e-3,'momentum':0.9})
param_lr.append({'params': net.cb.parameters(),'lr':1e-3,'momentum':0.9})

for name,module in net.iresnet0.named_modules():
    if 'copy' in name:
        param_lr.append({'params':module.parameters(),'lr':1e-3,'momentum':0.9})

param_lr = [{'params':net.parameters(),'lr':1e-3, 'momentum':0.9}]
net_size = sum([i.numel() for i in net.parameters()])
trainable_params = filter(lambda p: p.requires_grad, net.parameters())
trainable_size = sum([i.numel() for i in trainable_params])
print(net_size,trainable_size)
print(param_lr)
optimizer = optim.SGD(param_lr)
# scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=1, patience=10, verbose=True, threshold=0.0001, threshold_mode='rel', cooldown=100, min_lr=1e-2, eps=1e-08)

111115750 59594934
[{'params': <generator object Module.parameters at 0x7f5dccb8fd58>, 'lr': 0.001, 'momentum': 0.9}]


In [9]:
# net = nn.DataParallel(net, device_ids=[0,1])
net = net.cuda()

In [10]:
ckpt = utils.Checkpoint(iters_per_epoch=60, model_dir="./models/", model_name="first")

for i, data in enumerate(train_loader,0):
#     print("batch %d:"%i)
    optimizer.zero_grad()
    
    images, impulses, instance_masks, cat_ids = utils.cudify_data(data)
    del(data)
    outs = net([images,impulses])
    del(images, impulses)
    loss = L.loss_criterion1(outs, [instance_masks, cat_ids])
    del(instance_masks, cat_ids, outs)
    ckpt.update(loss.data, net)
    loss.backward()
    del(loss)
    optimizer.step()

torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.9017, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.9058, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9839, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.8962, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8993, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.8676, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9559, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.9290, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.9429, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.639

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9017, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.3438, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([2, 1, 448, 448]) torch.Size([2, 1, 448, 448]) torch.Size([2]) torch.Size([2, 134])
tensor(0.8475, device='cuda:0', grad_fn=<MeanBackward1>) tensor(5.3110, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8666, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7820, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9163, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5748, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9673, device='cuda:0', grad_fn=<MeanBackward1>) tensor(5.908

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9286, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.1231, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9121, device='cuda:0', grad_fn=<MeanBackward1>) tensor(5.0938, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9489, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.6925, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8682, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.6252, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9165, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.451

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8985, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.9202, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9191, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8911, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9072, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7845, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8869, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8893, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9115, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.041

tensor(0.9284, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.6460, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9093, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9284, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9209, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.6707, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8997, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9727, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9364, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1010, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8999, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8741, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.9124, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.6012, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9207, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.1995, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9300, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.4356, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9197, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.198

tensor(0.9364, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7535, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9245, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4083, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(4.0846, device='cuda:0')
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9161, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.3792, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9097, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8276, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9176, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8830, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 

tensor(0.8964, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.9611, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.9375, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.3322, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9329, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.1322, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9158, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7953, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9424, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1110, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9136, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5034, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8633, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.9279, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9425, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.4047, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9015, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.0832, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9053, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.431

tensor(0.9079, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.3314, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9041, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2946, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9224, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.4356, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(4.1028, device='cuda:0')
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9136, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7531, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8975, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0995, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 

tensor(0.9198, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2435, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9484, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.7776, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9358, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.6121, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9048, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.9670, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8523, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5602, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9139, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.6121, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8987, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.1920, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9412, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.6357, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9188, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.5047, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9171, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.658

tensor(0.8998, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.7696, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8950, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.0871, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9887, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8270, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9040, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.3160, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(3.6889, device='cuda:0')
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9065, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.3947, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9155, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.6615, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9200, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2467, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9160, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.3254, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9169, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2978, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9085, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.706

tensor(0.9057, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.3718, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([2, 1, 448, 448]) torch.Size([2, 1, 448, 448]) torch.Size([2]) torch.Size([2, 134])
tensor(0.9191, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2857, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9166, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.3793, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9465, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.6222, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8564, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.3234, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9203, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.5391, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9146, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5359, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8994, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8602, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9103, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0981, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9351, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.140

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9124, device='cuda:0', grad_fn=<MeanBackward1>) tensor(5.1154, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9071, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1621, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9116, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.9164, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9342, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5175, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9523, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.086

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9506, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.6762, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9134, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8206, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9159, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.9826, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9051, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.5917, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.8965, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.477

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9234, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.9997, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8982, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1461, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9074, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.6864, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9133, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.0804, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9370, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.115

torch.Size([1, 1, 448, 448]) torch.Size([1, 1, 448, 448]) torch.Size([1]) torch.Size([1, 134])
tensor(0.9921, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4855, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9127, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9587, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9182, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1242, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9455, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.0407, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9123, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.577

tensor(0.9199, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.1679, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9285, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.6971, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9528, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.5972, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9226, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8887, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8974, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.7286, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9356, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9609, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9390, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1009, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8948, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9130, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9083, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8611, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9248, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.741

tensor(0.9455, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.9175, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9294, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7920, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9521, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.0755, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9433, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.6285, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9034, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.3772, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9263, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0066, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9196, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8293, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9128, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.9101, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9230, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8447, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9386, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.261

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9243, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.6579, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9173, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0161, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9104, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0587, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8886, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1808, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9335, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.085

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9231, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.9156, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9071, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.4785, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9356, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.6915, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9715, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5124, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9162, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.045

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9341, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5053, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.9062, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7954, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9249, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.6523, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9179, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4704, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9311, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.942

tensor(0.9346, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7186, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9100, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.0369, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.8900, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0750, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9335, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.1225, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9412, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2111, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448

tensor(0.9045, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.1442, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9104, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2029, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9461, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4900, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9257, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.3816, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.9621, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.6336, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

tensor(0.9143, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.3083, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8832, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.3849, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9097, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.1021, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9445, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4200, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8968, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0648, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

tensor(0.9167, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1473, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9245, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8861, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9127, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0762, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([2, 1, 448, 448]) torch.Size([2, 1, 448, 448]) torch.Size([2]) torch.Size([2, 134])
tensor(0.8694, device='cuda:0', grad_fn=<MeanBackward1>) tensor(6.8575, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8834, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0702, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

tensor(0.9139, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5123, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9438, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.0978, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.9193, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8432, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9206, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2259, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9144, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5166, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9291, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8247, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9163, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7007, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9230, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8299, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9724, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8613, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9151, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.129

tensor(0.9033, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.3947, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9071, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9392, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9085, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.8156, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.9189, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1567, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8984, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0025, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9657, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.1411, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9328, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5407, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8956, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.2498, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9048, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4048, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.8660, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.054

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9401, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.0758, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8805, device='cuda:0', grad_fn=<MeanBackward1>) tensor(5.4509, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8937, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.6732, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9486, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8951, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8827, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.090

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9092, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.4100, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9551, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.2972, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9122, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0326, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9406, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.6514, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9190, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.190

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9120, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9417, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9254, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7525, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9417, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.4163, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9351, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5042, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.9074, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.362

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9199, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.9949, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9218, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.9891, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9181, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7617, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9260, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5766, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9365, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.974

tensor(0.8874, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.5803, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.8413, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0140, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8678, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.5067, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9217, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.7645, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8919, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.8156, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([2, 1, 448, 448]) torch.Size([2, 1, 448, 448]) torch.Size([2]) torch.Size([2, 134])
tensor(0.8658, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.3520, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9143, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.0640, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9082, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.2848, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8987, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7268, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.9040, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.184

tensor(0.9536, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.9904, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9052, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5233, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9069, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4987, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9360, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.6349, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9198, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5273, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

tensor(0.9469, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0375, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9587, device='cuda:0', grad_fn=<MeanBackward1>) tensor(5.2833, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9012, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9956, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9100, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7226, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9430, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9173, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9260, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7936, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9083, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.6307, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9311, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4258, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8902, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.6904, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9279, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.327

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8970, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.3016, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8896, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.5689, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8942, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7608, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9264, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7257, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9240, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.023

tensor(0.9556, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7283, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([2, 1, 448, 448]) torch.Size([2, 1, 448, 448]) torch.Size([2]) torch.Size([2, 134])
tensor(0.8517, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.1163, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9393, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8627, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9385, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.2523, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9016, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7877, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

tensor(0.9288, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.6043, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9150, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8408, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9205, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.1871, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9570, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.5387, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([1, 1, 448, 448]) torch.Size([1, 1, 448, 448]) torch.Size([1]) torch.Size([1, 134])
tensor(0.8747, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.2591, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9287, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7939, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9230, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.1559, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9069, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2925, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.8691, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.4447, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9019, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.899

tensor(0.9081, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2054, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9195, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9640, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9296, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.5055, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9514, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7058, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9236, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.5759, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9345, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0353, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8969, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8874, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9202, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8651, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9598, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7880, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9304, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.166

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9173, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0855, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9002, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8476, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9101, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.6594, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9254, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2822, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9191, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.574

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9106, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.3870, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9236, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.3831, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9854, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.4115, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9195, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.4422, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8959, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.109

tensor(0.8690, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.1373, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9433, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.8958, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9129, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0031, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8930, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5432, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9247, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4636, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9245, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0650, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9191, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.9629, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9018, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.6619, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8978, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9712, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8860, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.675

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9175, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.3610, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9375, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.4874, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9243, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8933, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9167, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.5687, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9244, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.869

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9034, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.9735, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8606, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1782, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9339, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0677, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9234, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.9724, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8960, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.565

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9331, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4876, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9140, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7462, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9178, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.3242, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9011, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.6969, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9197, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.749

tensor(0.8998, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7004, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9476, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8715, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8754, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.4574, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9234, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0040, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9376, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.5438, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

tensor(0.9709, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.3241, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9223, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8865, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9232, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.6406, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9292, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7914, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9063, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4338, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([1, 1, 448, 448]) torch.Size([1, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9197, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2743, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8941, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.5077, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8877, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4355, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9195, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8278, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9080, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.227

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9199, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1768, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.8952, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.9770, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9218, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.3366, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9306, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.5078, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9281, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.536

tensor(0.9233, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8700, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9037, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.9047, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9367, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8074, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9142, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.8800, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9406, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.3961, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9306, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.4870, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9091, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0254, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([1, 1, 448, 448]) torch.Size([1, 1, 448, 448]) torch.Size([1]) torch.Size([1, 134])
tensor(0.8330, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.9451, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9286, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.5646, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9217, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.575

tensor(0.9095, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0270, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9012, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.7932, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9267, device='cuda:0', grad_fn=<MeanBackward1>) tensor(0.9938, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9079, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.3635, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.8842, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9584, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9190, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.5837, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9096, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.0186, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9323, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.9398, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9220, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9025, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9166, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.273

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9343, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.6235, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.8832, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.2451, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9575, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.6072, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9228, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5504, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9573, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.430

tensor(0.9311, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0956, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9024, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9317, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9264, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8268, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8872, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2899, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9368, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9625, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9071, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0041, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9231, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.0231, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9549, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.2329, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9119, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7602, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8940, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.006

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9563, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.2394, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8946, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5416, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([2, 1, 448, 448]) torch.Size([2, 1, 448, 448]) torch.Size([2]) torch.Size([2, 134])
tensor(0.8770, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.4866, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9304, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8225, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8954, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.924

tensor(0.8977, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.6961, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9062, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.3168, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9307, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.2367, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9215, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8860, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([2, 1, 448, 448]) torch.Size([2, 1, 448, 448]) torch.Size([2]) torch.Size([2, 134])
tensor(0.9703, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9597, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

tensor(0.9251, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0545, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9184, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8321, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8634, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.3150, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9110, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.5392, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9263, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8292, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

tensor(0.9233, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0237, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9083, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9130, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8927, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8063, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8993, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8563, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9095, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0706, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

tensor(0.9166, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4948, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([2, 1, 448, 448]) torch.Size([2, 1, 448, 448]) torch.Size([2]) torch.Size([2, 134])
tensor(0.8983, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.3403, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9067, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.4483, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9290, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.3115, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.8947, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7137, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8879, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.7736, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9016, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.5323, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9239, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0401, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9003, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1242, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9231, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.640

torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.8889, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0605, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9231, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.5919, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8667, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.4864, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9176, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7387, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8869, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.346

tensor(0.9279, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1695, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9119, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8566, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9330, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.9758, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9199, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0470, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9363, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.2278, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9537, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.9248, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9104, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.2724, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9560, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.3317, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8920, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0020, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9570, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.117

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9109, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.6144, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9247, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.1582, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9355, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.1236, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9228, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2882, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.8773, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.212

tensor(0.8814, device='cuda:0', grad_fn=<MeanBackward1>) tensor(0.8062, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9208, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.7688, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9045, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7211, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9352, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.2951, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9324, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.1506, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9135, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.2854, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8985, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1956, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9211, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.3509, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8842, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.6306, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9150, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.071

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9096, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4478, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([1, 1, 448, 448]) torch.Size([1, 1, 448, 448]) torch.Size([1]) torch.Size([1, 134])
tensor(0.7781, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.6965, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9058, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.6711, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9418, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.6559, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9228, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.298

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9284, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.3116, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8779, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.6527, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8926, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.1638, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.8692, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4968, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9064, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.932

tensor(0.9393, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9004, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8937, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.2141, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([1, 1, 448, 448]) torch.Size([1, 1, 448, 448]) torch.Size([1]) torch.Size([1, 134])
tensor(0.9104, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.5199, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9090, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8164, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8945, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5308, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9253, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9646, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9033, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1907, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9312, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.5425, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9352, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.9604, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9118, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.647

tensor(0.9297, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8759, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9038, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8325, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9008, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0617, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9202, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0789, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9131, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9234, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8954, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.4476, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9230, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.8714, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9375, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.4100, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9411, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.5636, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9343, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.294

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9238, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.3758, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([2, 1, 448, 448]) torch.Size([2, 1, 448, 448]) torch.Size([2]) torch.Size([2, 134])
tensor(0.9107, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8917, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9195, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0397, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8903, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5378, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9331, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.546

tensor(0.8975, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.2873, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9286, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0492, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9057, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.9004, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9372, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1607, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9129, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.6983, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([2, 1, 448, 448]) torch.Size([2, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9132, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.6396, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9040, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.0236, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9249, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5178, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9028, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.1118, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8822, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.702

tensor(0.8785, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.6564, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9307, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5089, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(2.9181, device='cuda:0')
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8829, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7343, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9067, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.2406, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8965, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.9962, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 

tensor(0.9225, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.3459, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([2, 1, 448, 448]) torch.Size([2, 1, 448, 448]) torch.Size([2]) torch.Size([2, 134])
tensor(0.9152, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.3643, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9543, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.9430, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9014, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.7694, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9377, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8580, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9027, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7282, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9111, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.3923, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9092, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7882, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9363, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1713, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9089, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.319

tensor(0.9261, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0488, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8840, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.0634, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8867, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.2121, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(2.9572, device='cuda:0')
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9383, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.2030, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9424, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.8487, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 

torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.8927, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.1685, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9459, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.7118, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9346, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.7651, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.9596, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9821, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9585, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.113

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9358, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0397, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9120, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.9994, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9276, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.3567, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9138, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0046, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([3, 1, 448, 448]) torch.Size([3, 1, 448, 448]) torch.Size([3]) torch.Size([3, 134])
tensor(0.9136, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.506

tensor(0.9210, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.9646, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9299, device='cuda:0', grad_fn=<MeanBackward1>) tensor(4.2326, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9453, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9389, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9058, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.3335, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(2.9273, device='cuda:0')
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8999, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.3469, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 

tensor(0.8477, device='cuda:0', grad_fn=<MeanBackward1>) tensor(0.9904, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9215, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.5825, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9214, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.2519, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9213, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.7663, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9036, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.9150, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448

torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8830, device='cuda:0', grad_fn=<MeanBackward1>) tensor(1.8147, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9862, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1389, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9217, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8838, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.8658, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.9012, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9305, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.257

tensor(0.8211, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.1900, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9192, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.5549, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9159, device='cuda:0', grad_fn=<MeanBackward1>) tensor(3.8464, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9391, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.8891, device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([4, 1, 448, 448]) torch.Size([4, 1, 448, 448]) torch.Size([4]) torch.Size([4, 134])
tensor(0.9204, device='cuda:0', grad_fn=<MeanBackward1>) tensor(2.0594, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(2.8650, device='cuda:0')
torch.Size([4, 1, 

RuntimeError: expected a non-empty list of Tensors