In [1]:
import os
import glob
import pickle
import numpy as np
from tqdm import tqdm
import torch.nn as nn
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import euclidean_distances

import sys, string, random
from datetime import datetime
from omegaconf import OmegaConf
import wandb
import torch
import lightning as L
from lightning.pytorch.loggers import WandbLogger
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from torch.utils.data.sampler import Sampler, SubsetRandomSampler
from monai.networks.nets import UNet

sys.path.append('../')
from data_utils import MNMv2DataModule
from unet import LightningSegmentationModel
import torch.nn.functional as F

class ActualSequentialSampler(Sampler):
	r"""Samples elements sequentially, always in the same order.

	Arguments:
		data_source (Dataset): dataset to sample from
	"""

	def __init__(self, data_source):
		self.data_source = data_source

	def __iter__(self):
		return iter(self.data_source)

	def __len__(self):
		return len(self.data_source)
     
class CLUESampling:
    """
    Implements CLUE: Clustering via Uncertainty-weighted Embeddings for segmentation tasks.
    """
    def __init__(self, dset, model, device, args, balanced=False):
        self.dset = dset
        self.model = model
        self.device = device
        self.args = args
        self.random_state = np.random.RandomState(1234)
        self.T = args.clue_softmax_t
    
    def get_embedding(self, model, loader, device, args, with_emb=False):
        model.eval()
        embedding_pen = None
        embedding = None
        emb_dim = None
        batch_sz = args.batch_size
        num_samples = len(self.dset)

        with torch.no_grad():
            for batch_idx, batch in enumerate(tqdm(loader)):
                data = batch['data'].to(device)

                if with_emb:
                    e1, e2 = model(data, with_emb=True)
                    height, width = e2.shape[2], e2.shape[3]
                # else:
                #     e1 = model(data, with_emb=False)

                # Приводим размер логитов к размеру эмбеддингов
                e1 = F.interpolate(e1, size=(height, width), mode='bilinear', align_corners=False)

                if embedding_pen is None:
                    emb_dim = e2.shape[1]
                    num_classes = e1.shape[1]
                    embedding_pen = torch.zeros((num_samples * height * width, emb_dim), device='cpu')
                    embedding = torch.zeros((num_samples * height * width, num_classes), device='cpu')

                # Преобразуем логиты и эмбеддинги для каждого пикселя
                e1 = e1.permute(0, 2, 3, 1).reshape(-1, num_classes)
                e2 = e2.permute(0, 2, 3, 1).reshape(-1, emb_dim)

                # Вычисляем текущие индексы
                start_idx = batch_idx * batch_sz * height * width
                end_idx = start_idx + min(batch_sz * height * width, e2.shape[0])

                # Заполняем тензоры
                embedding[start_idx:end_idx, :] = e1.cpu()
                embedding_pen[start_idx:end_idx, :] = e2.cpu()

                if batch_idx > 1:
                    break
        
        return embedding, embedding_pen
    
    def query(self, n, data_loader):
        self.model.eval()

        # Получаем эмбеддинги для пикселей
        tgt_emb, tgt_pen_emb = self.get_embedding(self.model, data_loader, self.device, self.args, with_emb=True)

        # Используем предпоследние эмбеддинги (tgt_pen_emb)
        tgt_pen_emb = tgt_pen_emb.cpu().numpy()

        # Вычисляем неопределенность через энтропию для каждого пикселя
        tgt_scores = nn.Softmax(dim=1)(tgt_emb / self.T)
        tgt_scores += 1e-8
        sample_weights = -(tgt_scores * torch.log(tgt_scores)).sum(1).cpu().numpy()

        # Запуск K-means с учетом весов неопределенности
        km = KMeans(n)
        km.fit(tgt_pen_emb, sample_weight=sample_weights)

        # Центроиды эмбединги
        return km.cluster_centers_

if __name__ == '__main__':
    mnmv2_config   = OmegaConf.load('../../configs/mnmv2.yaml')
    unet_config    = OmegaConf.load('../../configs/monai_unet.yaml')
    trainer_config = OmegaConf.load('../../configs/unet_trainer.yaml')

    # init datamodule
    datamodule = MNMv2DataModule(
        data_dir=mnmv2_config.data_dir,
        vendor_assignment=mnmv2_config.vendor_assignment,
        batch_size=mnmv2_config.batch_size,
        binary_target=mnmv2_config.binary_target,
        non_empty_target=mnmv2_config.non_empty_target,
    )

    datamodule.setup(stage='fit')

    # init model
    cfg = OmegaConf.create({
        'unet_config': unet_config,
        'binary_target': True if unet_config.out_channels == 1 else False,
        'lr': unet_config.lr,
        'patience': unet_config.patience,
        'lambda_centroids': 0.6,
        'dataset': OmegaConf.to_container(mnmv2_config),
        'unet': OmegaConf.to_container(unet_config),
        'trainer': OmegaConf.to_container(trainer_config),
    })

    checkpoint_path = '../../pre-trained/trained_UNets/mnmv2-10-12_06-11-2024.ckpt'

    load_as_lightning_module = True#False
    load_as_pytorch_module = False#True

    if load_as_lightning_module:
        unet_config    = OmegaConf.load('../../configs/monai_unet.yaml')
        unet = UNet(
            spatial_dims=unet_config.spatial_dims,
            in_channels=unet_config.in_channels,
            out_channels=unet_config.out_channels,
            channels=[unet_config.n_filters_init * 2 ** i for i in range(unet_config.depth)],
            strides=[2] * (unet_config.depth - 1),
            num_res_units=4
        )
        
        model = LightningSegmentationModel.load_from_checkpoint(
            checkpoint_path,
            map_location=torch.device("cpu"),
            model=unet,
            binary_target=True if unet_config.out_channels == 1 else False,
            lr=unet_config.lr,
            patience=unet_config.patience,
            cfg=cfg
        )

    elif load_as_pytorch_module:
        checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))
        model_state_dict = checkpoint['state_dict']
        model_state_dict = {k.replace('model.model.', 'model.'): v for k, v in model_state_dict.items() if k.startswith('model.')}
        model_config = checkpoint['hyper_parameters']['cfgs']

        print(model_config)

        unet = UNet(
            spatial_dims=model_config['unet']['spatial_dims'],
            in_channels=model_config['unet']['in_channels'],
            out_channels=model_config['unet']['out_channels'],
            channels=[model_config['unet']['n_filters_init'] * 2 ** i for i in range(model_config['unet']['depth'])],
            strides=[2] * (model_config['unet']['depth'] - 1),
            num_res_units=4
        )

        unet.load_state_dict(model_state_dict)
    
    train_loader = datamodule.train_dataloader()
    device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    clue_sampler = CLUESampling(dset=datamodule.mnm_train, 
                                model=model, 
                                device=device, 
                                args=unet_config, )
                                            #cache_path='../../MedImSeg-Lab24/checkpoints/emb_and_weights.pkl')
    
    centroids = clue_sampler.query(n=2, data_loader=train_loader)

    datamodule.setup(stage='test')
    test_loader = datamodule.test_dataloader()
    start_loss  = model.test_model(test_loader, device)
    # out_str = '{} | Test performance on {}->{}: Round 0 (B=0): {:.2f}'.format(method, source, target, start_perf)

    # Шаг дообучения модели с использованием центроидов
    model.finetune_model_on_centroids(centroids, train_loader, model)

    # Тестируем производительность модели после дообучения
    test_perf = model.test_model(test_loader, device)
    # out_str += '\t Round 1 (B={}): {:.2f}'.format(len(cluster_centers), test_perf)

    # Выводим результаты и возвращаем производительность до и после обучения
    print(start_loss['dsc'], test_perf['dsc'])



Please cite the following paper when using nnUNet:

Isensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation." Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z


If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet

nnUNet_raw_data_base is not defined and nnU-Net can only be used on data for which preprocessed files are already present on your system. nnU-Net cannot be used for experiment planning and preprocessing like this. If this is not intended, please read documentation/setting_up_paths.md for information on how to set this up properly.
nnUNet_preprocessed is not defined and nnU-Net can not be used for preprocessing or training. If this is not intended, please read documentation/setting_up_paths.md for information on how to set this up.
RESULTS_FOLDER is not defined and nnU-Net cannot be used for training or inference. If this is not in

/home/mikhelson/MedImSeg-Lab24/CLUE/lib/python3.8/site-packages/lightning/fabric/utilities/cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
2it [00:02,  1.24s/it]
  super()._che

Test Results - Average Loss: 0.5151, Average Dice Score: 0.7595
{'unet_config': {'n_filters_init': 16, 'depth': 4, 'spatial_dims': 2, 'in_channels': 1, 'out_channels': 4, 'num_res_units': 4, 'lr': 0.001, 'patience': 5, 'clue_softmax_t': 1.0, 'batch_size': 32}, 'binary_target': False, 'lr': 0.001, 'patience': 5, 'lambda_centroids': 0.6, 'dataset': {'data_dir': '/home/mikhelson/MedImSeg-Lab24/data/MNM/', 'vendor_assignment': {'train': 'siemens', 'test': 'ge'}, 'batch_size': 32, 'binary_target': False, 'non_empty_target': False}, 'unet': {'n_filters_init': 16, 'depth': 4, 'spatial_dims': 2, 'in_channels': 1, 'out_channels': 4, 'num_res_units': 4, 'lr': 0.001, 'patience': 5, 'clue_softmax_t': 1.0, 'batch_size': 32}, 'trainer': {'train_transforms': 'global_transforms', 'limit_train_batches': 50, 'max_epochs': 100, 'early_stopping': {'monitor': 'val_loss', 'mode': 'min'}, 'model_checkpoint': {'save_top_k': 2, 'dirpath': '../../pre-trained/trained_UNets', 'monitor': 'val_loss'}, 'logging': Tr

0it [00:00, ?it/s]

tensor(5.0549, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.5875, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3130, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.9047, device='cuda:2', grad_fn=<DivBackward0>)


3it [00:00,  3.84it/s]

tensor(4.8871, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.4904, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3447, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.8901, device='cuda:2', grad_fn=<DivBackward0>)
tensor(4.8338, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.4682, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3467, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.8840, device='cuda:2', grad_fn=<DivBackward0>)
tensor(4.6992, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.4043, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3433, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.8695, device='cuda:2', grad_fn=<DivBackward0>)


7it [00:01,  7.26it/s]

tensor(4.5665, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.3252, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3443, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.8575, device='cuda:2', grad_fn=<DivBackward0>)
tensor(4.4832, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.2907, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3405, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.8474, device='cuda:2', grad_fn=<DivBackward0>)
tensor(4.4308, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.2784, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3527, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.8394, device='cuda:2', grad_fn=<DivBackward0>)


9it [00:01,  8.27it/s]

tensor(4.3541, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.2541, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3240, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.8287, device='cuda:2', grad_fn=<DivBackward0>)
tensor(4.3007, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.2325, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3330, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.8219, device='cuda:2', grad_fn=<DivBackward0>)
tensor(4.2592, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.2279, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3723, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.8147, device='cuda:2', grad_fn=<DivBackward0>)


13it [00:01,  9.70it/s]

tensor(4.2391, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.2296, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3642, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.8106, device='cuda:2', grad_fn=<DivBackward0>)
tensor(4.1983, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.2228, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3714, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.8039, device='cuda:2', grad_fn=<DivBackward0>)
tensor(4.1552, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.2045, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3782, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7984, device='cuda:2', grad_fn=<DivBackward0>)


15it [00:02, 10.11it/s]

tensor(4.0855, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.1434, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3115, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7943, device='cuda:2', grad_fn=<DivBackward0>)
tensor(4.0266, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.1120, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2960, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7877, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.9802, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.0964, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2983, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7810, device='cuda:2', grad_fn=<DivBackward0>)


19it [00:02, 10.56it/s]

tensor(3.9382, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.0804, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2959, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7752, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.8858, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.0959, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3740, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7625, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.8684, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.0920, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3665, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7597, device='cuda:2', grad_fn=<DivBackward0>)


21it [00:02, 10.72it/s]

tensor(3.8080, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.0734, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3637, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7506, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.7596, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.0561, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3638, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7436, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.7336, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.0379, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2662, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7411, device='cuda:2', grad_fn=<DivBackward0>)


23it [00:02, 10.89it/s]

tensor(3.7058, device='cuda:2', grad_fn=<MeanBackward0>) tensor(5.0181, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2996, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7385, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.6382, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.9793, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2560, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7307, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.5919, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.9574, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2638, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7246, device='cuda:2', grad_fn=<DivBackward0>)


27it [00:03, 11.00it/s]

tensor(3.5909, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.9782, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3776, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7213, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.5716, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.9855, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3688, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7164, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.5775, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.9911, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3656, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7168, device='cuda:2', grad_fn=<DivBackward0>)


31it [00:03, 11.03it/s]

tensor(3.5438, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.9759, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3561, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7122, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.5017, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.9235, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3520, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7112, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.4746, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.9028, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3423, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7087, device='cuda:2', grad_fn=<DivBackward0>)


33it [00:03, 11.04it/s]

tensor(3.4310, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.8826, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3317, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.7027, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.3862, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.8468, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3322, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6986, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.3714, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.8582, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3349, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6940, device='cuda:2', grad_fn=<DivBackward0>)


37it [00:04, 11.06it/s]

tensor(3.3218, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.8434, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3283, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6858, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.3444, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.8465, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3378, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6901, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.3167, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.8397, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3273, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6853, device='cuda:2', grad_fn=<DivBackward0>)


39it [00:04, 11.05it/s]

tensor(3.2921, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.8140, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3095, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6839, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.2775, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.7903, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3156, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6842, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.2568, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.7630, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3058, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6838, device='cuda:2', grad_fn=<DivBackward0>)


43it [00:04, 11.02it/s]

tensor(3.2158, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.7402, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3151, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6784, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.2207, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.7728, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3795, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6748, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.2013, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.7694, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3595, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6712, device='cuda:2', grad_fn=<DivBackward0>)


45it [00:04, 10.96it/s]

tensor(3.1534, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.7600, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3527, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6625, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.1523, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.7577, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3695, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6626, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.0832, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.6918, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3573, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6572, device='cuda:2', grad_fn=<DivBackward0>)


47it [00:04, 10.97it/s]

tensor(3.0515, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.6808, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3701, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6519, device='cuda:2', grad_fn=<DivBackward0>)
tensor(3.0293, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.6650, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3560, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6494, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.9978, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.6490, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3487, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6448, device='cuda:2', grad_fn=<DivBackward0>)


51it [00:05, 10.97it/s]

tensor(2.9815, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.6192, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3301, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6455, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.9952, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.6241, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3362, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6477, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.9917, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.6148, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3234, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6483, device='cuda:2', grad_fn=<DivBackward0>)


53it [00:05, 11.00it/s]

tensor(2.9555, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.6109, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3310, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6410, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.9437, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.6215, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3332, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6370, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.9410, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.6239, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3392, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6360, device='cuda:2', grad_fn=<DivBackward0>)


57it [00:05, 10.97it/s]

tensor(2.9198, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.6225, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3380, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6316, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.8543, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.5798, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3240, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6232, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.8124, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.5398, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2968, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6195, device='cuda:2', grad_fn=<DivBackward0>)


61it [00:06, 11.06it/s]

tensor(2.7914, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.5269, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3062, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6166, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.7838, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.5268, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2929, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6150, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.7422, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.4909, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3150, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6106, device='cuda:2', grad_fn=<DivBackward0>)


63it [00:06, 11.05it/s]

tensor(2.6509, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.4373, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3123, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5974, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.6759, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.4632, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2952, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5995, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.6715, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.4659, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3081, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5982, device='cuda:2', grad_fn=<DivBackward0>)


67it [00:06, 10.98it/s]

tensor(2.6719, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.4595, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3110, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5991, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.7170, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.5042, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3746, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6032, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.7021, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.4936, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3937, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6013, device='cuda:2', grad_fn=<DivBackward0>)


69it [00:06, 11.02it/s]

tensor(2.6930, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.4797, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3719, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6012, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.6797, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.4643, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3876, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.6003, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.6624, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.4624, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3235, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5966, device='cuda:2', grad_fn=<DivBackward0>)


73it [00:07, 11.00it/s]

tensor(2.6414, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.4570, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3332, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5926, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.5893, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.4194, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2984, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5859, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.5433, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.4055, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3123, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5773, device='cuda:2', grad_fn=<DivBackward0>)


75it [00:07, 10.96it/s]

tensor(2.5031, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.3743, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3234, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5722, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.4967, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.3647, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3277, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5720, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.4672, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.3550, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3253, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5665, device='cuda:2', grad_fn=<DivBackward0>)


77it [00:07, 11.04it/s]

tensor(2.4600, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.3382, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3363, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5671, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.4316, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.3452, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3003, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5596, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.4293, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.3377, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2995, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5600, device='cuda:2', grad_fn=<DivBackward0>)


81it [00:08, 11.02it/s]

tensor(2.4165, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.3297, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3084, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5581, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.4191, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.3309, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2886, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5586, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.4559, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.3112, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3576, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5697, device='cuda:2', grad_fn=<DivBackward0>)


85it [00:08, 11.05it/s]

tensor(2.4252, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.3210, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3130, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5613, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.4023, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.3049, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2975, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5580, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.3767, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.3037, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3029, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5522, device='cuda:2', grad_fn=<DivBackward0>)


87it [00:08, 10.95it/s]

tensor(2.3828, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.3258, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3086, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5508, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.3834, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.3125, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3007, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5527, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.3677, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.2982, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2969, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5509, device='cuda:2', grad_fn=<DivBackward0>)


89it [00:08, 11.04it/s]

tensor(2.3136, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.2725, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2822, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5415, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.2942, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.2340, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3361, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5418, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.3036, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.2497, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3469, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5421, device='cuda:2', grad_fn=<DivBackward0>)


93it [00:09, 10.98it/s]

tensor(2.2908, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.2311, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3298, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5414, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.2866, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.2367, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3225, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5397, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.2939, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.2533, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3471, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5393, device='cuda:2', grad_fn=<DivBackward0>)


97it [00:09, 11.01it/s]

tensor(2.2682, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.2336, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3255, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5358, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.2356, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.2216, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3216, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5296, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.2472, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.2251, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3202, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5319, device='cuda:2', grad_fn=<DivBackward0>)


99it [00:09, 10.99it/s]

tensor(2.2304, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.1897, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3513, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5324, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.2354, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.2151, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3460, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5303, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.2278, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.1989, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3401, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5306, device='cuda:2', grad_fn=<DivBackward0>)


103it [00:10, 11.08it/s]

tensor(2.2063, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.1815, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3344, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5276, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.1781, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.1707, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3371, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5222, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.1814, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.1757, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3355, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5224, device='cuda:2', grad_fn=<DivBackward0>)


105it [00:10, 11.02it/s]

tensor(2.1621, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.1523, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3498, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5207, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.1035, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.1148, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3321, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5112, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.0944, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.1119, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3492, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5094, device='cuda:2', grad_fn=<DivBackward0>)


109it [00:10, 11.03it/s]

tensor(2.0819, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.0998, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3554, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5078, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.0902, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.1019, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3371, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5096, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.0625, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.0675, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3378, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5071, device='cuda:2', grad_fn=<DivBackward0>)


111it [00:10, 10.81it/s]

tensor(2.0343, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.0360, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2712, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5040, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.0063, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.0311, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2810, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4977, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.0397, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.0228, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2741, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.5070, device='cuda:2', grad_fn=<DivBackward0>)


115it [00:11, 10.88it/s]

tensor(2.0067, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.0161, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2636, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4996, device='cuda:2', grad_fn=<DivBackward0>)
tensor(2.0105, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.0317, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3258, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4987, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.9683, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.0176, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3343, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4899, device='cuda:2', grad_fn=<DivBackward0>)


117it [00:11, 10.97it/s]

tensor(1.9865, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.0306, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3295, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4928, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.9704, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.0343, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3289, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4884, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.9840, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.0110, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3519, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4946, device='cuda:2', grad_fn=<DivBackward0>)


121it [00:11, 10.95it/s]

tensor(1.9669, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.0072, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3259, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4908, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.9458, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.0147, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3377, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4847, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.9486, device='cuda:2', grad_fn=<MeanBackward0>) tensor(4.0113, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3190, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4858, device='cuda:2', grad_fn=<DivBackward0>)


123it [00:11, 10.87it/s]

tensor(1.8819, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9887, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3585, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4718, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.8720, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9604, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2870, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4727, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.8663, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9560, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2973, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4718, device='cuda:2', grad_fn=<DivBackward0>)


125it [00:12, 10.96it/s]

tensor(1.8778, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9460, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2964, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4759, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.8908, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9612, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3787, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4773, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.9041, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9589, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3293, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4810, device='cuda:2', grad_fn=<DivBackward0>)


129it [00:12, 10.66it/s]

tensor(1.9322, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9790, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3616, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4856, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.9319, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9716, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3434, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4864, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.8520, device='cuda:2', grad_fn=<MeanBackward0>) 

131it [00:12, 10.68it/s]

tensor(3.9208, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2829, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4723, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.8510, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9159, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2823, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4727, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.8247, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9046, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2782, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4673, device='cuda:2', grad_fn=<DivBackward0>)


133it [00:12, 10.78it/s]

tensor(1.7967, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9024, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2742, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4604, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7979, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9041, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2964, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4605, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7943, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9017, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2958, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4599, device='cuda:2', grad_fn=<DivBackward0>)


137it [00:13, 10.91it/s]

tensor(1.7967, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9020, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3072, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4605, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7914, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9049, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2806, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4587, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.8465, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9157, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3900, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4716, device='cuda:2', grad_fn=<DivBackward0>)


139it [00:13, 10.92it/s]

tensor(1.8364, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9171, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3880, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4688, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.8601, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9204, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3832, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4745, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.8403, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.9113, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3581, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4705, device='cuda:2', grad_fn=<DivBackward0>)


143it [00:13, 10.75it/s]

tensor(1.7645, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8770, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2631, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4551, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7964, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8929, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2965, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4614, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7861, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8805, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2814, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4603, device='cuda:2', grad_fn=<DivBackward0>)


145it [00:13, 10.86it/s]

tensor(1.7542, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8570, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2684, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4548, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7446, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8678, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3317, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4511, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7341, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8736, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3307, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4477, device='cuda:2', grad_fn=<DivBackward0>)


149it [00:14, 10.89it/s]

tensor(1.7274, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8706, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3462, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4463, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7207, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8601, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3186, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4458, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7604, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8657, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3422, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4554, device='cuda:2', grad_fn=<DivBackward0>)


153it [00:14, 10.95it/s]

tensor(1.6950, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8291, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3037, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4427, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7522, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8398, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2838, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4563, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7323, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8277, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2957, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4526, device='cuda:2', grad_fn=<DivBackward0>)


155it [00:14, 10.89it/s]

tensor(1.7384, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8469, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3709, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4519, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7185, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8405, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3620, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4475, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7011, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8386, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3539, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4431, device='cuda:2', grad_fn=<DivBackward0>)


157it [00:14, 10.90it/s]

tensor(1.7165, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8560, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3535, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4451, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7331, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8808, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4628, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4466, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7587, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8863, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4274, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4526, device='cuda:2', grad_fn=<DivBackward0>)


161it [00:15, 11.00it/s]

tensor(1.7241, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8732, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4169, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4452, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.7229, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8669, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4012, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4456, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.6601, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8211, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4014, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4344, device='cuda:2', grad_fn=<DivBackward0>)


163it [00:15, 10.94it/s]

tensor(1.6840, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8368, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3970, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4389, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.6866, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8222, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3847, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4413, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.6930, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8179, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3887, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4434, device='cuda:2', grad_fn=<DivBackward0>)


167it [00:15, 10.89it/s]

tensor(1.6599, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8091, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3543, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4358, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.6417, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8013, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3424, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4319, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.6421, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8071, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3460, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4313, device='cuda:2', grad_fn=<DivBackward0>)


169it [00:16, 10.94it/s]

tensor(1.6366, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8023, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3330, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4304, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.6426, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8297, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3165, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4289, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.6462, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8189, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3349, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4311, device='cuda:2', grad_fn=<DivBackward0>)


173it [00:16, 10.99it/s]

tensor(1.6250, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8121, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2980, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4263, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.6233, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.8041, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3232, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4267, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.5758, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7764, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3117, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4173, device='cuda:2', grad_fn=<DivBackward0>)


175it [00:16, 10.82it/s]

tensor(1.6044, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7946, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2951, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4228, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.5663, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7738, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2822, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4151, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.5818, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7769, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2883, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4188, device='cuda:2', grad_fn=<DivBackward0>)


179it [00:16, 10.96it/s]

tensor(1.5804, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7573, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3649, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4206, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.5708, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7578, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3566, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4180, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.5918, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7818, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3570, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4209, device='cuda:2', grad_fn=<DivBackward0>)


181it [00:17, 10.95it/s]

tensor(1.5513, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7396, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3396, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4148, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.5270, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7444, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2836, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4078, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.5441, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7495, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2854, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4118, device='cuda:2', grad_fn=<DivBackward0>)


185it [00:17, 10.94it/s]

tensor(1.5346, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7449, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2789, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4098, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.5211, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7437, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2893, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4063, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.5811, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7766, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4222, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4187, device='cuda:2', grad_fn=<DivBackward0>)


187it [00:17, 10.98it/s]

tensor(1.5550, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7574, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4091, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4139, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.5587, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7513, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4534, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4155, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.5713, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7645, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4004, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4174, device='cuda:2', grad_fn=<DivBackward0>)


191it [00:18, 10.97it/s]

tensor(1.5771, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7626, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3285, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4191, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.5287, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7360, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3101, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4092, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.5180, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7476, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3088, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4051, device='cuda:2', grad_fn=<DivBackward0>)


193it [00:18, 10.95it/s]

tensor(1.5438, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7490, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3267, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4118, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4563, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6883, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2664, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3948, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4771, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6912, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2865, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4002, device='cuda:2', grad_fn=<DivBackward0>)


197it [00:18, 10.98it/s]

tensor(1.4776, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6925, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2587, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4002, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4837, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6906, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2504, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.4020, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4762, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6930, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3455, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3997, device='cuda:2', grad_fn=<DivBackward0>)


199it [00:18, 10.93it/s]

tensor(1.4715, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6907, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3295, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3987, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4720, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6974, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3402, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3981, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4452, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6755, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3318, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3932, device='cuda:2', grad_fn=<DivBackward0>)


203it [00:19, 10.70it/s]

tensor(1.4569, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7005, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3630, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3937, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4494, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7037, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3660, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3913, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4519, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6962, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3710, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3928, device='cuda:2', grad_fn=<DivBackward0>)


207it [00:19, 10.84it/s]

tensor(1.4357, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7088, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3685, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3871, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4267, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6983, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3623, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3858, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4254, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6956, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3439, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3857, device='cuda:2', grad_fn=<DivBackward0>)


209it [00:19, 10.82it/s]

tensor(1.4244, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6993, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3573, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3851, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4193, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6992, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3596, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3837, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4473, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7205, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4280, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3890, device='cuda:2', grad_fn=<DivBackward0>)


211it [00:19, 10.92it/s]

tensor(1.4557, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7333, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4358, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3899, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4457, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7282, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4300, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3878, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4592, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7253, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4000, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3917, device='cuda:2', grad_fn=<DivBackward0>)


215it [00:20, 10.95it/s]

tensor(1.4055, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6692, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3419, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3830, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4066, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6690, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3544, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3834, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4169, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6731, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3345, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3857, device='cuda:2', grad_fn=<DivBackward0>)


217it [00:20, 10.92it/s]

tensor(1.4602, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6795, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3518, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3969, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4825, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7148, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3220, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3991, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4704, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7111, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2961, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3962, device='cuda:2', grad_fn=<DivBackward0>)


221it [00:20, 10.86it/s]

tensor(1.4734, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7171, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3149, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3964, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4617, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.7060, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2875, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3944, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4258, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6801, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3527, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3874, device='cuda:2', grad_fn=<DivBackward0>)


223it [00:21, 10.85it/s]

tensor(1.4091, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6771, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3572, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3832, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4168, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6686, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3409, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3862, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4314, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6844, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3480, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3885, device='cuda:2', grad_fn=<DivBackward0>)


227it [00:21, 10.87it/s]

tensor(1.3726, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6442, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3140, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3766, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4042, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6522, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3200, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3845, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3678, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6291, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3141, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3769, device='cuda:2', grad_fn=<DivBackward0>)


229it [00:21, 10.86it/s]

tensor(1.3545, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6310, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3255, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3730, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3681, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6668, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3922, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3731, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3480, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6441, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3842, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3699, device='cuda:2', grad_fn=<DivBackward0>)


233it [00:21, 10.94it/s]

tensor(1.3367, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6405, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3936, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3672, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3550, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6366, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3867, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3726, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3902, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6507, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4046, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3808, device='cuda:2', grad_fn=<DivBackward0>)


237it [00:22, 10.95it/s]

tensor(1.3544, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6381, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3738, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3723, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3868, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6474, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3876, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3802, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3529, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6210, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3773, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3736, device='cuda:2', grad_fn=<DivBackward0>)


239it [00:22, 10.91it/s]

tensor(1.3991, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6456, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3059, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3838, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.4285, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6551, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3137, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3908, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3741, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6246, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3077, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3791, device='cuda:2', grad_fn=<DivBackward0>)


241it [00:22, 10.88it/s]

tensor(1.3810, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6379, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3035, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3796, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3237, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6116, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3343, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3665, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3387, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6088, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3287, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3710, device='cuda:2', grad_fn=<DivBackward0>)


245it [00:23, 10.93it/s]

tensor(1.3065, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5930, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3157, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3636, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3123, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5908, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3291, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3655, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3166, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5751, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2973, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3683, device='cuda:2', grad_fn=<DivBackward0>)


247it [00:23, 10.71it/s]

tensor(1.3204, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6047, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2879, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3663, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3195, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6104, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2783, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3655, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3115, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6057, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2676, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3637, device='cuda:2', grad_fn=<DivBackward0>)


251it [00:23, 10.76it/s]

tensor(1.2905, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5956, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4021, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3589, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2946, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6226, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4117, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3574, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3029, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6193, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3951, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3600, device='cuda:2', grad_fn=<DivBackward0>)


255it [00:23, 10.90it/s]

tensor(1.3084, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6192, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3890, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3615, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3271, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6150, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3525, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3671, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3040, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6205, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3376, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3602, device='cuda:2', grad_fn=<DivBackward0>)


257it [00:24, 10.86it/s]

tensor(1.3051, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6192, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3290, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3606, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.3080, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6118, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3275, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3621, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2972, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6007, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3728, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3603, device='cuda:2', grad_fn=<DivBackward0>)


259it [00:24, 10.89it/s]

tensor(1.3089, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.6118, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3946, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3624, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2822, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5864, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3508, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3575, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2852, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5973, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3650, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3573, device='cuda:2', grad_fn=<DivBackward0>)


263it [00:24, 10.80it/s]

tensor(1.2784, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5929, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3586, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3558, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2784, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5871, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3500, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3564, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2507, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5665, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3573, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3507, device='cuda:2', grad_fn=<DivBackward0>)


265it [00:24, 10.87it/s]

tensor(1.2592, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5737, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3437, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3524, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2665, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5734, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3643, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3544, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2869, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5788, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3565, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3596, device='cuda:2', grad_fn=<DivBackward0>)


269it [00:25, 10.92it/s]

tensor(1.3174, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5985, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3907, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3661, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2687, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5573, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3536, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3566, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2819, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5962, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4144, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3565, device='cuda:2', grad_fn=<DivBackward0>)


273it [00:25, 10.97it/s]

tensor(1.2502, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5790, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4264, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3493, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2780, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5875, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4152, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3562, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2785, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5764, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4333, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3575, device='cuda:2', grad_fn=<DivBackward0>)


275it [00:25, 10.88it/s]

tensor(1.2484, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5733, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3830, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3494, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2612, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5928, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3901, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3510, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2428, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5728, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3788, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3478, device='cuda:2', grad_fn=<DivBackward0>)


277it [00:25, 10.89it/s]

tensor(1.2590, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5871, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3614, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3510, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2470, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5717, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4307, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3491, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2484, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5774, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4189, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3490, device='cuda:2', grad_fn=<DivBackward0>)


281it [00:26, 10.98it/s]

tensor(1.2589, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5625, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4201, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3534, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2358, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5644, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4284, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3467, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2316, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5627, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3418, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3457, device='cuda:2', grad_fn=<DivBackward0>)


283it [00:26, 10.83it/s]

tensor(1.2582, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5708, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3334, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3524, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2445, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5733, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3397, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3483, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2696, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5822, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3552, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3544, device='cuda:2', grad_fn=<DivBackward0>)


287it [00:26, 10.95it/s]

tensor(1.2379, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5797, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3682, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3458, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2477, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5768, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3777, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3488, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2256, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5616, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3604, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3441, device='cuda:2', grad_fn=<DivBackward0>)


289it [00:27, 10.95it/s]

tensor(1.2224, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5481, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3599, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3445, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1997, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5250, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4225, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3403, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2174, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5301, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4192, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3449, device='cuda:2', grad_fn=<DivBackward0>)


293it [00:27, 10.89it/s]

tensor(1.2209, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5277, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4226, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3461, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2087, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5179, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4210, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3436, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2382, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5189, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2618, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3519, device='cuda:2', grad_fn=<DivBackward0>)


297it [00:27, 10.97it/s]

tensor(1.2250, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5227, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2534, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3477, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2083, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5130, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2415, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3440, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2302, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5375, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2401, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3477, device='cuda:2', grad_fn=<DivBackward0>)


299it [00:28, 10.92it/s]

tensor(1.2326, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5540, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.5085, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3468, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2081, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5509, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4473, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3402, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2104, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5541, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4432, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3406, device='cuda:2', grad_fn=<DivBackward0>)


301it [00:28, 10.92it/s]

tensor(1.1965, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5517, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4514, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3369, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2448, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5698, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2700, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3487, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2439, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5711, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2615, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3483, device='cuda:2', grad_fn=<DivBackward0>)


305it [00:28, 10.91it/s]

tensor(1.2336, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5720, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2629, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3454, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2595, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5796, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2684, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3519, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2581, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5670, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3162, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3527, device='cuda:2', grad_fn=<DivBackward0>)


307it [00:28, 10.93it/s]

tensor(1.2176, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5619, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3108, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3418, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2601, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5696, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3124, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3530, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2222, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5575, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3032, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3436, device='cuda:2', grad_fn=<DivBackward0>)


311it [00:29, 10.90it/s]

tensor(1.2061, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5421, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4144, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3405, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2249, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5396, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4270, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3460, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1734, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5129, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3959, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3340, device='cuda:2', grad_fn=<DivBackward0>)


313it [00:29, 10.97it/s]

tensor(1.1766, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5213, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4032, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3341, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1642, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5155, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2934, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3312, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1613, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4953, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2662, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3322, device='cuda:2', grad_fn=<DivBackward0>)


317it [00:29, 10.89it/s]

tensor(1.1632, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5117, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2603, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3312, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1725, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5118, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2643, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3339, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1935, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5366, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4073, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3375, device='cuda:2', grad_fn=<DivBackward0>)


319it [00:29, 10.79it/s]

tensor(1.1977, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5333, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4184, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3390, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2052, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5296, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4283, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3415, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2094, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5210, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4656, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3435, device='cuda:2', grad_fn=<DivBackward0>)


323it [00:30, 10.88it/s]

tensor(1.1708, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5084, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3099, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3337, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1804, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5065, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3303, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3366, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2005, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5257, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3092, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3405, device='cuda:2', grad_fn=<DivBackward0>)


325it [00:30, 10.89it/s]

tensor(1.1890, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4977, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3133, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3399, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1718, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5068, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3778, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3341, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1460, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4963, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3674, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3278, device='cuda:2', grad_fn=<DivBackward0>)


329it [00:30, 10.85it/s]

tensor(1.1549, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4987, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3932, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3301, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1739, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5101, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3651, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3344, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1794, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5192, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3569, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3351, device='cuda:2', grad_fn=<DivBackward0>)


331it [00:30, 10.86it/s]

tensor(1.1885, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5316, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3690, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3365, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1597, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5290, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3562, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3286, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1636, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5303, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3616, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3296, device='cuda:2', grad_fn=<DivBackward0>)


335it [00:31, 10.84it/s]

tensor(1.2201, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5594, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3847, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3428, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2029, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5581, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3747, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3381, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1846, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5411, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3701, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3345, device='cuda:2', grad_fn=<DivBackward0>)


339it [00:31, 10.94it/s]

tensor(1.1731, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5308, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3705, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3322, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1496, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5189, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3091, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3267, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1284, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5019, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2937, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3222, device='cuda:2', grad_fn=<DivBackward0>)


341it [00:31, 10.38it/s]

tensor(1.1426, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4856, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3047, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3278, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1683, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5098, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3325, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3329, device='cuda:2', grad_fn=<DivBackward0>)


343it [00:32, 10.53it/s]

tensor(1.1268, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4831, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2771, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3235, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1447, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4824, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2702, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3287, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1564, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4926, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2698, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3311, device='cuda:2', grad_fn=<DivBackward0>)


347it [00:32, 10.76it/s]

tensor(1.1581, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4944, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2653, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3314, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1409, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4810, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4382, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3277, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1461, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4794, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4137, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3294, device='cuda:2', grad_fn=<DivBackward0>)


349it [00:32, 10.80it/s]

tensor(1.1450, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4802, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4102, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3290, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1458, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4875, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3986, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3285, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1324, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5007, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3574, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3235, device='cuda:2', grad_fn=<DivBackward0>)


351it [00:32, 10.81it/s]

tensor(1.1594, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5243, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3397, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3290, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1521, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5332, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3352, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3261, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1651, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5394, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3338, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3292, device='cuda:2', grad_fn=<DivBackward0>)


355it [00:33, 10.77it/s]

tensor(1.1965, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5666, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3429, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3355, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1896, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5479, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3289, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3353, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1691, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5433, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3208, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3299, device='cuda:2', grad_fn=<DivBackward0>)


357it [00:33, 10.85it/s]

tensor(1.1545, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5294, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3047, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3271, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1951, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5290, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3859, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3387, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.2192, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5256, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3885, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3458, device='cuda:2', grad_fn=<DivBackward0>)


361it [00:33, 10.90it/s]

tensor(1.1969, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5220, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3616, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3398, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1999, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5068, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3787, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3422, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1317, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4914, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2903, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3241, device='cuda:2', grad_fn=<DivBackward0>)


363it [00:33, 10.82it/s]

tensor(1.1330, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4751, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2891, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3260, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1362, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4741, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2969, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3271, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1205, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4664, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2866, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3233, device='cuda:2', grad_fn=<DivBackward0>)


367it [00:34, 10.86it/s]

tensor(1.1453, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4788, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3011, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3292, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1287, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4758, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2830, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3247, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1330, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4921, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2908, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3245, device='cuda:2', grad_fn=<DivBackward0>)


369it [00:34, 10.87it/s]

tensor(1.1453, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5039, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2958, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3269, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1945, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5450, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3465, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3370, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1922, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5469, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3516, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3361, device='cuda:2', grad_fn=<DivBackward0>)


373it [00:34, 10.86it/s]

tensor(1.2040, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5575, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3477, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3384, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1915, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5443, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3396, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3362, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1715, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5318, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4425, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3317, device='cuda:2', grad_fn=<DivBackward0>)


375it [00:35, 10.83it/s]

tensor(1.1475, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5201, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4314, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3260, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1643, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5108, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4274, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3316, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1397, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4907, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4234, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3265, device='cuda:2', grad_fn=<DivBackward0>)


379it [00:35, 10.86it/s]

tensor(1.1636, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4973, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3654, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3327, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1664, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5104, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3486, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3323, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1698, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5040, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3248, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3338, device='cuda:2', grad_fn=<DivBackward0>)


381it [00:35, 10.84it/s]

tensor(1.1727, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5156, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3317, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3336, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1442, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5040, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3628, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3265, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1164, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4930, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3784, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3196, device='cuda:2', grad_fn=<DivBackward0>)


385it [00:35, 10.85it/s]

tensor(1.1366, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4985, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3596, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3249, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1065, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4892, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3422, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3171, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1288, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4875, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2657, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3237, device='cuda:2', grad_fn=<DivBackward0>)


387it [00:36, 10.57it/s]

tensor(1.1229, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4810, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2444, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3226, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1168, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4743, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2466, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3214, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1189, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4838, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2417, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3212, device='cuda:2', grad_fn=<DivBackward0>)


391it [00:36, 10.63it/s]

tensor(1.1166, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4790, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3064, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3209, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1094, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4672, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3114, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3200, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1009, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4683, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2945, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3174, device='cuda:2', grad_fn=<DivBackward0>)


393it [00:36, 10.69it/s]

tensor(1.0857, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4639, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3027, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3134, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1159, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4729, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3964, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3213, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0774, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4601, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3960, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3114, device='cuda:2', grad_fn=<DivBackward0>)


397it [00:37, 10.75it/s]

tensor(1.0743, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4485, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3769, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3115, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0735, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4555, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3937, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3107, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0799, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4707, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4523, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3112, device='cuda:2', grad_fn=<DivBackward0>)


399it [00:37, 10.77it/s]

tensor(1.1114, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4857, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4387, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3188, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1447, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5137, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4558, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3258, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1135, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4981, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4342, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3183, device='cuda:2', grad_fn=<DivBackward0>)


403it [00:37, 10.82it/s]

tensor(1.1314, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5111, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3205, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3222, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1171, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4999, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3376, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3192, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0941, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4759, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3282, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3148, device='cuda:2', grad_fn=<DivBackward0>)


405it [00:37, 10.83it/s]

tensor(1.1123, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4899, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3009, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3187, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1083, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4667, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3468, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3197, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0912, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4700, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3532, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3145, device='cuda:2', grad_fn=<DivBackward0>)


409it [00:38, 10.84it/s]

tensor(1.0863, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4530, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3412, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3146, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0925, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4548, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3464, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3162, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0856, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4579, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3502, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3140, device='cuda:2', grad_fn=<DivBackward0>)


411it [00:38, 10.86it/s]

tensor(1.0962, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4611, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2977, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3167, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1108, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4719, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3062, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3199, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1027, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4860, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3016, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3163, device='cuda:2', grad_fn=<DivBackward0>)


415it [00:38, 10.86it/s]

tensor(1.1026, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4874, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3418, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3162, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1042, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4694, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3473, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3183, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1056, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4879, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3350, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3170, device='cuda:2', grad_fn=<DivBackward0>)


417it [00:38, 10.83it/s]

tensor(1.1147, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4913, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3416, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3193, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0810, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4863, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3427, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3101, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0842, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4821, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3292, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3114, device='cuda:2', grad_fn=<DivBackward0>)


421it [00:39, 10.76it/s]

tensor(1.0763, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4774, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3119, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3095, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0613, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4650, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3134, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3063, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0954, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4649, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3438, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3162, device='cuda:2', grad_fn=<DivBackward0>)


423it [00:39, 10.79it/s]

tensor(1.0805, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4575, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3324, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3125, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0866, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4528, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3393, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3147, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0808, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4507, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3365, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3132, device='cuda:2', grad_fn=<DivBackward0>)


427it [00:39, 10.78it/s]

tensor(1.0418, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4437, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4338, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3025, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0393, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4527, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4198, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3010, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0435, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4504, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4106, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3024, device='cuda:2', grad_fn=<DivBackward0>)


429it [00:40, 10.83it/s]

tensor(1.0522, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4590, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4057, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3042, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1215, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4991, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2703, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3205, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1206, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5091, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2762, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3193, device='cuda:2', grad_fn=<DivBackward0>)


433it [00:40, 10.82it/s]

tensor(1.1077, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5008, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2918, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3164, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1189, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5071, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2539, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3190, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0506, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4629, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2599, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3034, device='cuda:2', grad_fn=<DivBackward0>)


435it [00:40, 10.83it/s]

tensor(1.0343, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4541, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2588, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2994, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0508, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4517, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2502, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3044, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0556, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4450, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2543, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3064, device='cuda:2', grad_fn=<DivBackward0>)


439it [00:40, 10.82it/s]

tensor(1.0468, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4317, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4025, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3050, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0692, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4351, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3972, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3113, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0522, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4217, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4046, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3075, device='cuda:2', grad_fn=<DivBackward0>)


441it [00:41, 10.77it/s]

tensor(1.0531, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4166, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3917, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3082, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1065, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4620, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3648, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3196, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0828, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4579, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3292, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3132, device='cuda:2', grad_fn=<DivBackward0>)


445it [00:41, 10.81it/s]

tensor(1.0812, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4525, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3136, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3132, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0910, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4618, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3242, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3152, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0840, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4583, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3575, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3135, device='cuda:2', grad_fn=<DivBackward0>)


447it [00:41, 10.81it/s]

tensor(1.0803, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4535, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3560, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3128, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0901, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4613, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3700, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3149, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0787, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4545, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3581, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3123, device='cuda:2', grad_fn=<DivBackward0>)


451it [00:42, 10.81it/s]

tensor(1.0929, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4605, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2913, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3158, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1028, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4664, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2819, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3181, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0962, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4690, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2928, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3160, device='cuda:2', grad_fn=<DivBackward0>)


453it [00:42, 10.77it/s]

tensor(1.0940, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4597, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2805, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3162, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0505, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4463, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2605, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3048, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0388, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4421, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2568, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3018, device='cuda:2', grad_fn=<DivBackward0>)


457it [00:42, 10.79it/s]

tensor(1.0705, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4570, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2600, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3097, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0589, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4544, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2632, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3065, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0697, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4492, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3543, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3101, device='cuda:2', grad_fn=<DivBackward0>)


459it [00:42, 10.84it/s]

tensor(1.0526, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4424, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3544, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3058, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0628, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4552, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3602, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3076, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0539, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4465, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3498, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3058, device='cuda:2', grad_fn=<DivBackward0>)


463it [00:43, 10.83it/s]

tensor(1.0968, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4909, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3141, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3142, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.1144, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5037, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3075, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3181, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0769, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4778, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3125, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3096, device='cuda:2', grad_fn=<DivBackward0>)


465it [00:43, 10.81it/s]

tensor(1.1310, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.5066, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2957, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3225, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0680, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4667, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3234, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3081, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0814, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4645, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3385, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3121, device='cuda:2', grad_fn=<DivBackward0>)


469it [00:43, 10.80it/s]

tensor(1.0680, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4675, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3281, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3080, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0678, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4466, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3120, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3098, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0389, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4239, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3447, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3034, device='cuda:2', grad_fn=<DivBackward0>)


471it [00:43, 10.78it/s]

tensor(1.0310, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4311, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3500, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3005, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0169, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4191, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3532, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2974, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0401, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4275, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3492, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3035, device='cuda:2', grad_fn=<DivBackward0>)


475it [00:44, 10.79it/s]

tensor(1.0460, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4296, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3534, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3050, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0066, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4128, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3383, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2949, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0199, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4241, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3331, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2979, device='cuda:2', grad_fn=<DivBackward0>)


477it [00:44, 10.76it/s]

tensor(1.0265, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4172, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3133, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3004, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0227, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4390, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3024, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2974, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0231, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4316, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3003, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2981, device='cuda:2', grad_fn=<DivBackward0>)


481it [00:44, 10.87it/s]

tensor(1.0271, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4331, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2967, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2992, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0130, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4296, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2978, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2954, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0412, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4361, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3338, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3030, device='cuda:2', grad_fn=<DivBackward0>)


483it [00:45, 10.82it/s]

tensor(1.0294, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4173, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3325, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3012, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0451, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4249, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3309, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3051, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0291, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4185, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3367, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3010, device='cuda:2', grad_fn=<DivBackward0>)


487it [00:45, 10.81it/s]

tensor(1.0819, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4544, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2976, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3132, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0667, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4435, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2795, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3098, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0745, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4498, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2744, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3115, device='cuda:2', grad_fn=<DivBackward0>)


489it [00:45, 10.79it/s]

tensor(1.0762, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4536, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2798, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3116, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0522, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4535, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4333, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3047, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0440, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4516, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4318, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3025, device='cuda:2', grad_fn=<DivBackward0>)


493it [00:45, 10.81it/s]

tensor(1.0729, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4505, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4311, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3109, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0342, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4350, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4199, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3011, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0946, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4546, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2183, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3169, device='cuda:2', grad_fn=<DivBackward0>)


495it [00:46, 10.77it/s]

tensor(1.0947, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4581, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2377, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3166, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0810, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4489, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2105, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3134, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0839, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4618, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2101, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3131, device='cuda:2', grad_fn=<DivBackward0>)


499it [00:46, 10.85it/s]

tensor(1.0055, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4246, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3838, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2936, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0025, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4197, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3846, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2931, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0060, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4253, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3837, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2937, device='cuda:2', grad_fn=<DivBackward0>)


501it [00:46, 10.81it/s]

tensor(1.0066, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4207, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3869, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2943, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0363, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4250, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3493, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3026, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0540, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4342, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3474, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3069, device='cuda:2', grad_fn=<DivBackward0>)


505it [00:47, 10.81it/s]

tensor(1.0463, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4235, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3448, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3056, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0541, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4349, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3503, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3069, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9899, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4136, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3617, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2900, device='cuda:2', grad_fn=<DivBackward0>)


507it [00:47, 10.75it/s]

tensor(0.9919, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4025, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3566, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2915, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9825, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3995, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3503, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2890, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9746, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3987, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3621, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2867, device='cuda:2', grad_fn=<DivBackward0>)


511it [00:47, 10.77it/s]

tensor(1.0092, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4156, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3605, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2955, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0171, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4177, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3484, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2976, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0235, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4334, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3682, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2981, device='cuda:2', grad_fn=<DivBackward0>)


513it [00:47, 10.71it/s]

tensor(1.0296, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4354, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3617, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2997, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0508, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4513, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3429, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3045, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0438, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4589, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3189, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3018, device='cuda:2', grad_fn=<DivBackward0>)


517it [00:48, 10.78it/s]

tensor(1.0362, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4576, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3136, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2997, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0410, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4544, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3182, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3014, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9999, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4312, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3737, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2914, device='cuda:2', grad_fn=<DivBackward0>)


519it [00:48, 10.79it/s]

tensor(1.0221, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4262, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3547, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2983, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9903, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4186, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3617, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2897, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0026, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4237, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3728, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2929, device='cuda:2', grad_fn=<DivBackward0>)


523it [00:48, 10.78it/s]

tensor(1.0200, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4212, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3678, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2981, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0175, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4136, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3656, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2981, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0273, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4159, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3609, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3008, device='cuda:2', grad_fn=<DivBackward0>)


525it [00:48, 10.77it/s]

tensor(0.9905, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4022, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3575, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2911, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0053, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4126, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3114, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2946, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0107, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4144, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3121, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2960, device='cuda:2', grad_fn=<DivBackward0>)


529it [00:49, 10.80it/s]

tensor(1.0214, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4191, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2941, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2987, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0047, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4079, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3086, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2948, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9950, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4101, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2792, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2918, device='cuda:2', grad_fn=<DivBackward0>)


531it [00:49, 10.76it/s]

tensor(1.0093, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4122, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2684, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2958, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9958, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4150, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2550, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2916, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9836, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4150, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2562, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2880, device='cuda:2', grad_fn=<DivBackward0>)


535it [00:49, 10.80it/s]

tensor(1.0090, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4315, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3889, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2940, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0247, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4379, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3838, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2981, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0387, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4505, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3824, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3010, device='cuda:2', grad_fn=<DivBackward0>)


537it [00:50, 10.81it/s]

tensor(1.0126, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4386, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3674, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2945, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0144, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4355, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2750, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2953, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0512, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4542, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2830, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3043, device='cuda:2', grad_fn=<DivBackward0>)


541it [00:50, 10.84it/s]

tensor(1.0350, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4431, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2581, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3006, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0192, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4361, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2894, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2966, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0092, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4226, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3721, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2948, device='cuda:2', grad_fn=<DivBackward0>)


543it [00:50, 10.84it/s]

tensor(1.0181, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4203, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3749, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2976, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0064, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4106, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3837, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2951, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0046, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4267, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3739, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2932, device='cuda:2', grad_fn=<DivBackward0>)


547it [00:50, 10.74it/s]

tensor(0.9589, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4093, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3106, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2813, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9719, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4023, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3053, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2857, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9755, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4087, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3278, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2862, device='cuda:2', grad_fn=<DivBackward0>)


549it [00:51, 10.73it/s]

tensor(0.9586, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3886, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3034, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2829, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9898, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3928, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2733, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2917, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9793, device='cuda:2', grad_fn=<MeanBackward0>) 

551it [00:51, 10.50it/s]

tensor(3.3912, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2692, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2888, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9962, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3964, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2688, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2933, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9864, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3972, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2957, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2904, device='cuda:2', grad_fn=<DivBackward0>)


555it [00:51, 10.67it/s]

tensor(0.9573, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3930, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3163, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2821, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9812, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4091, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2987, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2878, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0093, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4242, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3293, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2948, device='cuda:2', grad_fn=<DivBackward0>)


557it [00:51, 10.66it/s]

tensor(0.9932, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4137, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2967, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2909, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9774, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4124, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3809, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2864, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9870, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4071, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3594, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2897, device='cuda:2', grad_fn=<DivBackward0>)


561it [00:52, 10.63it/s]

tensor(0.9845, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4036, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3572, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2892, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9748, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4023, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3521, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2865, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0153, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4308, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4038, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2959, device='cuda:2', grad_fn=<DivBackward0>)


563it [00:52, 10.60it/s]

tensor(1.0238, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4441, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4039, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2972, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0241, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4322, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4118, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2984, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0100, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4247, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4003, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2949, device='cuda:2', grad_fn=<DivBackward0>)


567it [00:52, 10.57it/s]

tensor(0.9980, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3941, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3241, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2940, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9961, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4090, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3315, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2922, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0122, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4145, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3096, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2964, device='cuda:2', grad_fn=<DivBackward0>)


569it [00:53, 10.62it/s]

tensor(1.0024, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4251, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3060, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2927, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9586, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3961, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2184, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2823, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9570, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3943, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2192, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2819, device='cuda:2', grad_fn=<DivBackward0>)


573it [00:53, 10.73it/s]

tensor(0.9661, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4030, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2221, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2839, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9605, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3951, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2148, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2829, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0189, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4419, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3267, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2960, device='cuda:2', grad_fn=<DivBackward0>)


575it [00:53, 10.47it/s]

tensor(1.0348, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4355, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3276, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3012, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0270, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4342, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3285, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2991, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0638, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4434, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3178, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.3089, device='cuda:2', grad_fn=<DivBackward0>)


579it [00:54, 10.63it/s]

tensor(0.9694, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3928, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3562, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2857, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9734, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3917, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3717, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2870, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9727, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3937, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3831, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2866, device='cuda:2', grad_fn=<DivBackward0>)


581it [00:54, 10.67it/s]

tensor(0.9550, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3857, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3424, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2821, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9840, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4051, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2833, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2890, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9974, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4032, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2647, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2931, device='cuda:2', grad_fn=<DivBackward0>)


585it [00:54, 10.70it/s]

tensor(1.0040, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4217, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2610, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2934, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0170, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4260, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2855, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2968, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0255, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4397, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3430, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2981, device='cuda:2', grad_fn=<DivBackward0>)


587it [00:54, 10.77it/s]

tensor(1.0112, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4379, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3248, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2941, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0113, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4361, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3527, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2943, device='cuda:2', grad_fn=<DivBackward0>)


589it [00:54, 10.28it/s]

tensor(1.0124, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4326, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3674, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2949, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9583, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4043, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3556, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2815, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9631, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4163, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3603, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2819, device='cuda:2', grad_fn=<DivBackward0>)


593it [00:55, 10.56it/s]

tensor(0.9742, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4112, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3753, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2856, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9552, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4104, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3571, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2801, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9820, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4024, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2391, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2886, device='cuda:2', grad_fn=<DivBackward0>)


595it [00:55, 10.62it/s]

tensor(0.9564, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4074, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2337, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2807, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9524, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3984, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2269, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2803, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9517, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3946, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2343, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2804, device='cuda:2', grad_fn=<DivBackward0>)


599it [00:55, 10.72it/s]

tensor(0.9674, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4104, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4060, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2837, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9672, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4086, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4001, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2837, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9750, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4175, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3841, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2853, device='cuda:2', grad_fn=<DivBackward0>)


601it [00:56, 10.65it/s]

tensor(0.9684, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3950, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3893, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2852, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9797, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4077, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2686, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2875, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9794, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4029, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2540, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2878, device='cuda:2', grad_fn=<DivBackward0>)


605it [00:56, 10.69it/s]

tensor(0.9760, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4040, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2422, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2867, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9699, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3951, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2442, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2857, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9427, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3818, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4077, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2787, device='cuda:2', grad_fn=<DivBackward0>)


607it [00:56, 10.73it/s]

tensor(0.9370, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3800, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3664, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2772, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9421, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3901, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3702, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2779, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9459, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3916, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3681, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2789, device='cuda:2', grad_fn=<DivBackward0>)


611it [00:57, 10.69it/s]

tensor(0.9813, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3937, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2936, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2892, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9619, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3923, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2949, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2836, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9841, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3955, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2945, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2898, device='cuda:2', grad_fn=<DivBackward0>)


613it [00:57, 10.67it/s]

tensor(0.9608, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3859, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2955, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2838, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9557, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3746, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2834, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2832, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9635, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3777, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3151, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2853, device='cuda:2', grad_fn=<DivBackward0>)


617it [00:57, 10.70it/s]

tensor(0.9796, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3908, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2885, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2889, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9483, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3803, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2844, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2805, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9369, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3681, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2736, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2782, device='cuda:2', grad_fn=<DivBackward0>)


619it [00:57, 10.65it/s]

tensor(0.9352, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3722, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2864, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2773, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9460, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3749, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2618, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2803, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9512, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3900, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3047, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2806, device='cuda:2', grad_fn=<DivBackward0>)


623it [00:58, 10.67it/s]

tensor(0.9366, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3969, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3752, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2757, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9378, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3998, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3799, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2758, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9489, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4140, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3793, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2780, device='cuda:2', grad_fn=<DivBackward0>)


625it [00:58, 10.64it/s]

tensor(0.9401, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4035, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3835, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2762, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9668, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4170, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2862, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2829, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9551, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4045, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2773, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2805, device='cuda:2', grad_fn=<DivBackward0>)


629it [00:58, 10.68it/s]

tensor(0.9589, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4076, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2872, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2814, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9407, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3914, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2647, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2774, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9750, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4043, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4063, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2864, device='cuda:2', grad_fn=<DivBackward0>)


631it [00:58, 10.75it/s]

tensor(0.9816, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4080, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3965, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2880, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9954, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4112, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4166, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2918, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9769, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4014, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4125, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2872, device='cuda:2', grad_fn=<DivBackward0>)


635it [00:59, 10.69it/s]

tensor(0.9479, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3799, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3637, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2805, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9471, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3783, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3661, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2803, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9648, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3968, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3747, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2840, device='cuda:2', grad_fn=<DivBackward0>)


637it [00:59, 10.71it/s]

tensor(0.9741, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3890, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3666, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2874, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9455, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3800, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3447, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2797, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9933, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4059, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3715, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2916, device='cuda:2', grad_fn=<DivBackward0>)


641it [00:59, 10.63it/s]

tensor(0.9727, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3918, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3527, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2868, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9953, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3982, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3426, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2929, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9496, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3936, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3394, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2798, device='cuda:2', grad_fn=<DivBackward0>)


643it [01:00, 10.70it/s]

tensor(0.9438, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3976, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3295, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2778, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9448, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3961, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3387, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2782, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9559, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3908, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3352, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2819, device='cuda:2', grad_fn=<DivBackward0>)


647it [01:00, 10.67it/s]

tensor(0.9247, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3848, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4104, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2732, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9111, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3657, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4142, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2707, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9265, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3760, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4160, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2744, device='cuda:2', grad_fn=<DivBackward0>)


649it [01:00, 10.65it/s]

tensor(0.9068, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3702, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4053, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2691, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9137, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3690, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4051, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2712, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9258, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3726, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4120, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2745, device='cuda:2', grad_fn=<DivBackward0>)


653it [01:00, 10.68it/s]

tensor(0.9206, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3669, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4117, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2734, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9202, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3680, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4110, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2732, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9879, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4013, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3664, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2904, device='cuda:2', grad_fn=<DivBackward0>)


655it [01:01, 10.66it/s]

tensor(0.9927, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4171, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3526, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2905, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9645, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4021, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3406, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2835, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9788, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4220, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3425, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2860, device='cuda:2', grad_fn=<DivBackward0>)


659it [01:01, 10.66it/s]

tensor(0.9475, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3971, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3232, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2789, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9739, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4201, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3207, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2847, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9639, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4248, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3159, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2815, device='cuda:2', grad_fn=<DivBackward0>)


661it [01:01, 10.73it/s]

tensor(0.9538, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4028, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3214, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2803, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9475, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3968, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2680, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2789, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9472, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4002, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2458, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2786, device='cuda:2', grad_fn=<DivBackward0>)


665it [01:02, 10.59it/s]

tensor(0.9305, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3900, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2472, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2745, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9463, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3976, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2456, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2785, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9064, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3705, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4290, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2689, device='cuda:2', grad_fn=<DivBackward0>)


667it [01:02, 10.66it/s]

tensor(0.9109, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3735, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4259, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2700, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9021, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3684, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4060, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2678, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9316, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3835, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4068, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2753, device='cuda:2', grad_fn=<DivBackward0>)


671it [01:02, 10.67it/s]

tensor(0.9463, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3937, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4509, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2788, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9265, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3907, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4028, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2732, device='cuda:2', grad_fn=<DivBackward0>)


673it [01:02, 10.25it/s]

tensor(0.9593, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4023, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3977, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2819, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9465, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3895, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3930, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2792, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9563, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3842, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3586, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2826, device='cuda:2', grad_fn=<DivBackward0>)


675it [01:03, 10.38it/s]

tensor(0.9354, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3695, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3406, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2776, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9196, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3634, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3344, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2734, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9372, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3769, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3705, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2775, device='cuda:2', grad_fn=<DivBackward0>)


679it [01:03, 10.49it/s]

tensor(0.8988, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3601, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3708, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2675, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9090, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3677, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3655, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2699, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8978, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3633, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3631, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2669, device='cuda:2', grad_fn=<DivBackward0>)


681it [01:03, 10.58it/s]

tensor(0.8931, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3694, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3612, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2651, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9307, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3879, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3513, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2747, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9029, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3722, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3344, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2678, device='cuda:2', grad_fn=<DivBackward0>)


685it [01:03, 10.61it/s]

tensor(0.9151, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3869, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3195, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2702, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9147, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3847, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3025, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2703, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9403, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3957, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4076, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2769, device='cuda:2', grad_fn=<DivBackward0>)


687it [01:04, 10.63it/s]

tensor(0.9313, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3825, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3500, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2753, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9375, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3932, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3521, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2763, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9499, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3916, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3294, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2801, device='cuda:2', grad_fn=<DivBackward0>)


691it [01:04, 10.64it/s]

tensor(0.9865, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3958, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3994, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2905, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9996, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3928, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3674, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2946, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9992, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3958, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3631, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2942, device='cuda:2', grad_fn=<DivBackward0>)


693it [01:04, 10.59it/s]

tensor(0.9888, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3805, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3579, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2925, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9420, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3778, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3605, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2789, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9356, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3651, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3740, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2780, device='cuda:2', grad_fn=<DivBackward0>)


697it [01:05, 10.66it/s]

tensor(0.9753, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3856, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3608, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2881, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9463, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3815, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3545, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2798, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9613, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3871, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4187, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2838, device='cuda:2', grad_fn=<DivBackward0>)


699it [01:05, 10.65it/s]

tensor(0.9916, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3987, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4059, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2918, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9631, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4025, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4028, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2831, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9404, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3841, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3932, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2779, device='cuda:2', grad_fn=<DivBackward0>)


703it [01:05, 10.68it/s]

tensor(0.9467, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4079, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3757, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2778, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9353, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4113, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3628, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2742, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9402, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4110, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3261, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2756, device='cuda:2', grad_fn=<DivBackward0>)


705it [01:05, 10.69it/s]

tensor(0.9457, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4148, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3383, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2769, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9468, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3889, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3955, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2794, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9517, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3901, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3807, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2807, device='cuda:2', grad_fn=<DivBackward0>)


709it [01:06, 10.50it/s]

tensor(0.9419, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3923, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3809, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2777, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9406, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3807, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3735, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2782, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9767, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4105, device='cuda:2', grad_fn=<MeanBackward0>)


711it [01:06, 10.48it/s]

tensor(0.3537, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2864, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9781, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4082, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3472, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2870, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9862, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4141, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3527, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2888, device='cuda:2', grad_fn=<DivBackward0>)


713it [01:06, 10.53it/s]

tensor(1.0040, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4281, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3508, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2929, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0236, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4377, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2989, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2978, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0073, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4227, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2845, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2943, device='cuda:2', grad_fn=<DivBackward0>)


717it [01:06, 10.62it/s]

tensor(1.0257, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4345, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2794, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2986, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0089, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4263, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2784, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2945, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9136, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3540, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3487, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2724, device='cuda:2', grad_fn=<DivBackward0>)


719it [01:07, 10.62it/s]

tensor(0.9157, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3530, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3457, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2731, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9115, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3445, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3358, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2725, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8991, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3408, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3479, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2691, device='cuda:2', grad_fn=<DivBackward0>)


723it [01:07, 10.46it/s]

tensor(0.9102, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3504, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3853, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2717, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9082, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3577, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3485, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2705, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9196, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3729, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3351, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2726, device='cuda:2', grad_fn=<DivBackward0>)


725it [01:07, 10.34it/s]

tensor(0.9296, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3810, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3535, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2749, device='cuda:2', grad_fn=<DivBackward0>)
tensor(1.0094, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4225, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2883, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2949, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9959, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4182, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2803, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2914, device='cuda:2', grad_fn=<DivBackward0>)


729it [01:08, 10.41it/s]

tensor(0.9756, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4109, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2729, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2860, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9949, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4101, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2935, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2918, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9025, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3693, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4663, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2679, device='cuda:2', grad_fn=<DivBackward0>)


731it [01:08, 10.16it/s]

tensor(0.8938, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3574, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4590, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2662, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8853, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3572, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4682, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2637, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8874, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3515, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4620, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2648, device='cuda:2', grad_fn=<DivBackward0>)


735it [01:08, 10.40it/s]

tensor(0.9040, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3682, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3328, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2684, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9104, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3669, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3465, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2704, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9180, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3764, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3418, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2719, device='cuda:2', grad_fn=<DivBackward0>)


737it [01:08, 10.47it/s]

tensor(0.9008, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3633, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3277, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2678, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9217, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3765, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2621, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2730, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8891, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3580, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2650, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2648, device='cuda:2', grad_fn=<DivBackward0>)


741it [01:09, 10.53it/s]

tensor(0.9045, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3651, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2631, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2688, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9059, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3652, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2888, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2692, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8959, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3675, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3175, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2660, device='cuda:2', grad_fn=<DivBackward0>)


743it [01:09, 10.55it/s]

tensor(0.9138, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3699, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3173, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2712, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9052, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3660, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3095, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2689, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9023, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3660, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3085, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2681, device='cuda:2', grad_fn=<DivBackward0>)


747it [01:09, 10.62it/s]

tensor(0.9556, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3989, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3300, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2811, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9487, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3916, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3250, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2797, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9611, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3986, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3259, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2828, device='cuda:2', grad_fn=<DivBackward0>)


749it [01:10, 10.73it/s]

tensor(0.9336, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3849, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3214, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2758, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9176, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3674, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2823, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2725, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9148, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3696, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2877, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2715, device='cuda:2', grad_fn=<DivBackward0>)


753it [01:10, 10.50it/s]

tensor(0.9238, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3660, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2820, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2745, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9359, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3856, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2859, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2764, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8818, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3454, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3397, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2636, device='cuda:2', grad_fn=<DivBackward0>)


755it [01:10, 10.50it/s]

tensor(0.8690, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3475, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3372, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2596, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8893, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3526, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3320, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2653, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8747, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3460, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3457, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2614, device='cuda:2', grad_fn=<DivBackward0>)


759it [01:10, 10.56it/s]

tensor(0.9229, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3762, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3076, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2734, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9338, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3726, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2816, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2769, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9340, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3785, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2703, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2765, device='cuda:2', grad_fn=<DivBackward0>)


761it [01:11, 10.56it/s]

tensor(0.9533, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3833, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2682, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2818, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9094, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3732, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3245, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2696, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8936, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3716, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3188, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2650, device='cuda:2', grad_fn=<DivBackward0>)


765it [01:11, 10.47it/s]

tensor(0.9077, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3728, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3187, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2691, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8938, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3662, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3101, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2655, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8864, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3508, device='cuda:2', grad_fn=<MeanBackward0>)


767it [01:11, 10.26it/s]

tensor(0.3556, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2645, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8937, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3558, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3522, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2663, device='cuda:2', grad_fn=<DivBackward0>)


769it [01:11, 10.04it/s]

tensor(0.8950, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3496, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3507, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2672, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9265, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3540, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3341, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2762, device='cuda:2', grad_fn=<DivBackward0>)


771it [01:12,  9.92it/s]

tensor(0.8862, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3308, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3384, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2661, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8948, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3393, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3265, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2680, device='cuda:2', grad_fn=<DivBackward0>)


773it [01:12,  9.83it/s]

tensor(0.8792, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3410, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3296, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2632, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9207, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3639, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3178, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2737, device='cuda:2', grad_fn=<DivBackward0>)


775it [01:12,  9.75it/s]

tensor(0.8774, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3513, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3562, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2618, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8824, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3644, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3495, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2623, device='cuda:2', grad_fn=<DivBackward0>)


777it [01:12,  9.67it/s]

tensor(0.8698, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3566, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3652, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2591, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8649, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3596, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3640, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2574, device='cuda:2', grad_fn=<DivBackward0>)


779it [01:13,  9.59it/s]

tensor(0.9167, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3880, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3448, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2706, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9144, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3735, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3083, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2710, device='cuda:2', grad_fn=<DivBackward0>)


781it [01:13,  9.58it/s]

tensor(0.9254, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3772, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2977, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2740, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9095, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3595, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3146, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2707, device='cuda:2', grad_fn=<DivBackward0>)


783it [01:13,  9.59it/s]

tensor(0.9126, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3538, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3343, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2721, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9053, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3494, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3264, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2703, device='cuda:2', grad_fn=<DivBackward0>)


785it [01:13,  9.63it/s]

tensor(0.9208, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3587, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3224, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2742, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9199, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3575, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3332, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2740, device='cuda:2', grad_fn=<DivBackward0>)


787it [01:13,  9.65it/s]

tensor(0.8804, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3508, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4160, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2628, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8661, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3437, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4021, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2590, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8831, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3410, device='cuda:2', grad_fn=<MeanBackward0>)


789it [01:14,  9.92it/s]

tensor(0.3963, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2643, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8543, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3375, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3922, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2560, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9109, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3720, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3295, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2701, device='cuda:2', grad_fn=<DivBackward0>)


792it [01:14, 10.11it/s]

tensor(0.9112, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3696, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3215, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2704, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9083, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3692, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3281, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2696, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9186, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3746, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3175, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2722, device='cuda:2', grad_fn=<DivBackward0>)


794it [01:14,  9.96it/s]

tensor(0.9289, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3876, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2745, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2742, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9216, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3876, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2821, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2721, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9122, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3803, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2392, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2699, device='cuda:2', grad_fn=<DivBackward0>)


798it [01:14, 10.27it/s]

tensor(0.9053, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3624, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2477, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2692, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9002, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3661, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2535, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2674, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8910, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3649, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2406, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2648, device='cuda:2', grad_fn=<DivBackward0>)


800it [01:15, 10.35it/s]

tensor(0.8750, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3490, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2394, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2613, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8759, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3522, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2330, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2613, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8756, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3434, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3642, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2619, device='cuda:2', grad_fn=<DivBackward0>)


804it [01:15, 10.55it/s]

tensor(0.8744, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3301, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3679, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2626, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8740, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3372, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3658, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2619, device='cuda:2', grad_fn=<DivBackward0>)


806it [01:15, 10.26it/s]

tensor(0.8797, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3399, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3635, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2634, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9251, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3647, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3450, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2749, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9106, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3751, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3280, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2698, device='cuda:2', grad_fn=<DivBackward0>)


808it [01:15, 10.40it/s]

tensor(0.9426, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3983, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3397, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2774, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9182, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3788, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3513, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2718, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8613, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3489, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3316, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2572, device='cuda:2', grad_fn=<DivBackward0>)


812it [01:16, 10.47it/s]

tensor(0.8750, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3627, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3249, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2602, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8727, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3589, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3231, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2598, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8870, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3686, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3175, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2633, device='cuda:2', grad_fn=<DivBackward0>)


814it [01:16, 10.50it/s]

tensor(0.9081, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3687, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2549, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2696, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8980, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3608, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2783, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2672, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8965, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3485, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2579, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2677, device='cuda:2', grad_fn=<DivBackward0>)


818it [01:16, 10.52it/s]

tensor(0.8921, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3495, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2451, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2663, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9148, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3484, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3084, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2732, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8963, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3439, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2950, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2680, device='cuda:2', grad_fn=<DivBackward0>)


820it [01:17, 10.54it/s]

tensor(0.8934, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3467, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3035, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2670, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8965, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3508, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2972, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2675, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9097, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3619, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3211, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2706, device='cuda:2', grad_fn=<DivBackward0>)


824it [01:17, 10.55it/s]

tensor(0.8724, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3390, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3045, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2613, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8805, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3419, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3061, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2635, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8995, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3566, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3185, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2680, device='cuda:2', grad_fn=<DivBackward0>)


826it [01:17, 10.10it/s]

tensor(0.8918, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3531, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2468, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2660, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8837, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3497, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2503, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2638, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8815, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3593, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2544, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2624, device='cuda:2', grad_fn=<DivBackward0>)


830it [01:17, 10.56it/s]

tensor(0.8843, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3515, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2439, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2638, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8996, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3636, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2604, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2674, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9266, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3855, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2649, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2737, device='cuda:2', grad_fn=<DivBackward0>)


832it [01:18, 10.48it/s]

tensor(0.9133, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3772, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2652, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2704, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9388, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3833, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2590, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2775, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9075, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3696, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4185, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2693, device='cuda:2', grad_fn=<DivBackward0>)


836it [01:18, 10.51it/s]

tensor(0.9021, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3577, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3864, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2687, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9117, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3669, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3733, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2708, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9019, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3563, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3845, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2687, device='cuda:2', grad_fn=<DivBackward0>)


838it [01:18, 10.50it/s]

tensor(0.9326, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3711, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2905, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2767, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9322, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3763, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3039, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2761, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8980, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3594, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2881, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2673, device='cuda:2', grad_fn=<DivBackward0>)


842it [01:19, 10.34it/s]

tensor(0.9133, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3559, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2766, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2721, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9146, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3572, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3409, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2724, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9279, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3732, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3319, device='cuda:2', grad_fn=<AddBackward0>) 

844it [01:19, 10.39it/s]

tensor(0.2751, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9291, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3781, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3360, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2750, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9311, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3865, device='cuda:2', grad_fn=<MeanBackward0>)


846it [01:19, 10.07it/s]

tensor(0.3444, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2750, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8685, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3465, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2994, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2595, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8644, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3457, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3012, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2584, device='cuda:2', grad_fn=<DivBackward0>)


848it [01:19, 10.26it/s]

tensor(0.8784, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3496, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3048, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2622, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8696, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3481, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3046, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2597, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8672, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3525, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4032, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2587, device='cuda:2', grad_fn=<DivBackward0>)


852it [01:20, 10.48it/s]

tensor(0.8636, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3515, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3989, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2577, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8607, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3451, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3989, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2573, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8537, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3468, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3873, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2551, device='cuda:2', grad_fn=<DivBackward0>)


854it [01:20, 10.57it/s]

tensor(0.9092, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3612, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2739, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2705, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9006, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3589, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2464, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2681, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9088, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3601, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2506, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2705, device='cuda:2', grad_fn=<DivBackward0>)


858it [01:20, 10.61it/s]

tensor(0.9160, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3727, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2590, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2716, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9051, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3738, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4338, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2683, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9133, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3795, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4072, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2703, device='cuda:2', grad_fn=<DivBackward0>)


860it [01:20, 10.67it/s]

tensor(0.9244, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3809, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4028, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2734, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9130, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3959, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4025, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2689, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8711, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3658, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2612, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2588, device='cuda:2', grad_fn=<DivBackward0>)


864it [01:21, 10.75it/s]

tensor(0.8968, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3782, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2494, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2655, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8890, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3703, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2419, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2638, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8848, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3636, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2462, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2631, device='cuda:2', grad_fn=<DivBackward0>)


866it [01:21, 10.70it/s]

tensor(0.9003, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3749, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3096, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2668, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8952, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3591, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2838, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2665, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8722, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3509, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2959, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2603, device='cuda:2', grad_fn=<DivBackward0>)


870it [01:21, 10.57it/s]

tensor(0.8645, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3391, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2887, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2589, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8522, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3355, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3158, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2555, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8622, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3379, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3176, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2583, device='cuda:2', grad_fn=<DivBackward0>)


872it [01:21, 10.57it/s]

tensor(0.8663, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3266, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3072, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2604, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8810, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3466, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3558, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2632, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9273, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3590, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3215, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2761, device='cuda:2', grad_fn=<DivBackward0>)


876it [01:22, 10.63it/s]

tensor(0.9096, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3577, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3063, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2709, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9143, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3624, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3034, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2719, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9343, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3780, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.2928, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2766, device='cuda:2', grad_fn=<DivBackward0>)


878it [01:22, 10.65it/s]

tensor(0.9168, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3842, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4158, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2709, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9220, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3800, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3820, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2728, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9369, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.4000, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3887, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2756, device='cuda:2', grad_fn=<DivBackward0>)


882it [01:22, 10.50it/s]

tensor(0.9224, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3847, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3772, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2725, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8780, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3501, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3409, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2621, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8787, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3457, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3300, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2626, device='cuda:2', grad_fn=<DivBackward0>)


884it [01:23, 10.48it/s]

tensor(0.8963, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3574, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3323, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2670, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8914, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3541, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3351, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2658, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9415, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3785, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3473, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2787, device='cuda:2', grad_fn=<DivBackward0>)


888it [01:23, 10.46it/s]

tensor(0.9212, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3718, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3156, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2732, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9231, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3728, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3228, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2737, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9263, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3827, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3219, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2738, device='cuda:2', grad_fn=<DivBackward0>)


890it [01:23, 10.49it/s]

tensor(0.9241, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3831, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4160, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2731, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9142, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3845, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4214, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2701, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9566, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3991, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4645, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2814, device='cuda:2', grad_fn=<DivBackward0>)


894it [01:24, 10.52it/s]

tensor(0.9180, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3766, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.4140, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2719, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.9032, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3692, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3573, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2681, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8935, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3575, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3547, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2661, device='cuda:2', grad_fn=<DivBackward0>)


897it [01:24, 10.63it/s]

tensor(0.8945, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3489, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3612, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2671, device='cuda:2', grad_fn=<DivBackward0>)
tensor(0.8941, device='cuda:2', grad_fn=<MeanBackward0>) tensor(3.3529, device='cuda:2', grad_fn=<MeanBackward0>)
tensor(0.3768, device='cuda:2', grad_fn=<AddBackward0>) tensor(0.2667, device='cuda:2', grad_fn=<DivBackward0>)





KeyboardInterrupt: 

In [2]:
# # Шаг дообучения модели с использованием центроидов
# model.finetune_model_on_centroids(centroids, train_loader, model)

# # Тестируем производительность модели после дообучения
# test_perf = model.test_model(test_loader, device)
# # out_str += '\t Round 1 (B={}): {:.2f}'.format(len(cluster_centers), test_perf)

# # Выводим результаты и возвращаем производительность до и после обучения
# print(start_loss['dsc'], test_perf['dsc'])
cfg

{'unet_config': {'n_filters_init': 16, 'depth': 4, 'spatial_dims': 2, 'in_channels': 1, 'out_channels': 4, 'num_res_units': 4, 'lr': 0.001, 'patience': 5, 'clue_softmax_t': 1.0, 'batch_size': 32}, 'binary_target': False, 'lr': 0.001, 'patience': 5, 'lambda_centroids': 0.6, 'dataset': {'data_dir': '/home/mikhelson/MedImSeg-Lab24/data/MNM/', 'vendor_assignment': {'train': 'siemens', 'test': 'ge'}, 'batch_size': 32, 'binary_target': False, 'non_empty_target': False}, 'unet': {'n_filters_init': 16, 'depth': 4, 'spatial_dims': 2, 'in_channels': 1, 'out_channels': 4, 'num_res_units': 4, 'lr': 0.001, 'patience': 5, 'clue_softmax_t': 1.0, 'batch_size': 32}, 'trainer': {'train_transforms': 'global_transforms', 'limit_train_batches': 50, 'max_epochs': 100, 'early_stopping': {'monitor': 'val_loss', 'mode': 'min'}, 'model_checkpoint': {'save_top_k': 2, 'dirpath': '../../pre-trained/trained_UNets', 'monitor': 'val_loss'}, 'logging': True}}