In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torch import Tensor
from torchvision.utils import save_image
from tqdm import tqdm
import torchvision.datasets as datasets 
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.data import random_split
from CustomDataset import CustomDataset
from PIL import Image
import wandb
import os

In [2]:
# !pip install wandb

In [3]:
# print(os.path.exists('/lustre/'))
# os.listdir('/lustre/')

In [4]:
!wandb login

/bin/bash: wandb: command not found


In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
INPUT_DIM = 3
#38804
Z_DIM = 128
NUM_EPOCHS = 400
BATCH_SIZE = 256
LR_RATE = 2e-4
KL_COEFF_MAX = 0.0000025

LOAD_TO_RAM = False
PICKLE_LOAD = False

current_epoch = 0
NUM_WORKERS = 4

RUN_NAME = "128Z_25000sampl"
PATH = "model_wandb_128dim_25000pics.pt"

ADAPTIVE_BETA = True

cuda


In [6]:
def kl_coeff_set(epoch):
    if not ADAPTIVE_BETA:
        return KL_COEFF_MAX
    # Calculate the beta value for the current epoch
    if epoch < NUM_EPOCHS / 4:
        coeff = (KL_COEFF_MAX / (NUM_EPOCHS / 4)) * epoch
    else:
        coeff = KL_COEFF_MAX

    return coeff

In [7]:
class VanillaVAE(nn.Module):
    def __init__(self,
                    in_channels: int,
                    latent_dim: int,
                    hidden_dims: list = None,
                    **kwargs) -> None:
            super(VanillaVAE, self).__init__()

            self.latent_dim = latent_dim

            modules = []
            if hidden_dims is None:
                hidden_dims = [32, 64, 128, 256, 512]

            # Build Encoder
            for h_dim in hidden_dims:
                modules.append(
                    nn.Sequential(
                        nn.Conv2d(in_channels, out_channels=h_dim,
                                kernel_size= 3, stride= 2, padding  = 1),
                        nn.BatchNorm2d(h_dim),
                        nn.LeakyReLU())
                )
                in_channels = h_dim

            self.encoder = nn.Sequential(*modules)
            self.fc_mu = nn.Linear(hidden_dims[-1]*42, latent_dim)
            self.fc_var = nn.Linear(hidden_dims[-1]*42, latent_dim)


            # Build Decoder
            modules = []

            self.decoder_input = nn.Linear(latent_dim, hidden_dims[-1] * 42)

            hidden_dims.reverse()

            for i in range(len(hidden_dims) - 1):
                modules.append(
                    nn.Sequential(
                        nn.ConvTranspose2d(hidden_dims[i],
                                        hidden_dims[i + 1],
                                        kernel_size=3,
                                        stride = 2,
                                        padding=1,
                                        output_padding=1),
                        nn.BatchNorm2d(hidden_dims[i + 1]),
                        nn.LeakyReLU())
                )



            self.decoder = nn.Sequential(*modules)

            self.final_layer = nn.Sequential(
                                nn.ConvTranspose2d(hidden_dims[-1],
                                                hidden_dims[-1],
                                                kernel_size=3,
                                                stride=2,
                                                padding=1,
                                                output_padding=1),
                                nn.BatchNorm2d(hidden_dims[-1]),
                                nn.LeakyReLU(),
                                nn.Conv2d(hidden_dims[-1], out_channels= 3,
                                        kernel_size= 3, padding= 1),
                                nn.Tanh())

    def encode(self, input: Tensor):
        """
        Encodes the input by passing through the encoder network
        and returns the latent codes.
        :param input: (Tensor) Input tensor to encoder [N x C x H x W]
        :return: (Tensor) List of latent codes
        """
        result = self.encoder(input)
        result = torch.flatten(result, start_dim=1)

        # Split the result into mu and var components
        # of the latent Gaussian distribution
        mu = self.fc_mu(result)
        log_var = self.fc_var(result)

        return [mu, log_var]

    def decode(self, z: Tensor) -> Tensor:
        """
        Maps the given latent codes
        onto the image space.
        :param z: (Tensor) [B x D]
        :return: (Tensor) [B x C x H x W]
        """
        result = self.decoder_input(z)
        result = result.view(-1, 512, 7, 6)
        result = self.decoder(result)
        result = self.final_layer(result)
        return result

    def reparameterize(self, mu: Tensor, logvar: Tensor) -> Tensor:
        """
        Reparameterization trick to sample from N(mu, var) from
        N(0,1).
        :param mu: (Tensor) Mean of the latent Gaussian [B x D]
        :param logvar: (Tensor) Standard deviation of the latent Gaussian [B x D]
        :return: (Tensor) [B x D]
        """
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return eps * std + mu

    def forward(self, input: Tensor, **kwargs):
        mu, log_var = self.encode(input)
        z = self.reparameterize(mu, log_var)
        return  [self.decode(z), input, mu, log_var]

    def loss_function(self,
                    *args) -> dict:
        """
        Computes the VAE loss function.
        KL(N(\mu, \sigma), N(0, 1)) = \log \frac{1}{\sigma} + \frac{\sigma^2 + \mu^2}{2} - \frac{1}{2}
        :param args:
        :param kwargs:
        :return:
        """
        recons = args[0]
        input = args[1]
        mu = args[2]
        log_var = args[3]

        recons_loss =F.mse_loss(recons, input)


        kld_loss = torch.mean(-0.5 * torch.sum(1 + log_var - mu ** 2 - log_var.exp(), dim = 1), dim = 0)

        # loss = recons_loss + kld_loss
        return {'loss': (recons_loss, kld_loss), 'Reconstruction_Loss':recons_loss.detach(), 'KLD':-kld_loss.detach()}

    def sample(self,
            num_samples:int,
            current_device: int, **kwargs) -> Tensor:
        """
        Samples from the latent space and return the corresponding
        image space map.
        :param num_samples: (Int) Number of samples
        :param current_device: (Int) Device to run the model
        :return: (Tensor)
        """
        z = torch.randn(num_samples,
                        self.latent_dim)

        z = z.to(current_device)

        samples = self.decode(z)
        return samples

    def generate(self, x: Tensor, **kwargs) -> Tensor:
        """
        Given an input image x, returns the reconstructed image
        :param x: (Tensor) [B x C x H x W]
        :return: (Tensor) [B x C x H x W]
        """

        return self.forward(x)[0]

In [8]:
data_length = 25000 #202599
dataset = CustomDataset("data/img_align_celeba", [(str(i).rjust(6, '0')+".jpg") for i in range(1,data_length+1)], PICKLE_LOAD, transform=transforms.ToTensor(), loadToRam=LOAD_TO_RAM)

dataset_train, dataset_val = random_split(dataset, [int(data_length*0.8), data_length- int(data_length*0.8)])
# dataset_train, dataset_val = dataset, dataset

train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
validation_loader = DataLoader(dataset_val, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)

In [9]:
wandb.init(
    # set the wandb project where this run will be logged
    project="machine_run",
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": LR_RATE,
    "architecture": "VAE",
    "dataset": "CELEBA",
    "epochs": NUM_EPOCHS,
    },
    name=RUN_NAME
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mivanjevtic501[0m ([33mracunarski-fakultet[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
# Define train function
best_loss = None
def train(num_epochs, model, optimizer, loss_fn):
    # Start training
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1} of {num_epochs}")
        loop = tqdm(enumerate(train_loader))
        epoch_loss = 0
        epoch_reconst_loss = 0
        epoch_kl_div = 0
        for i, x in loop:
            # Forward pass
            x = x.to(device) #.view(-1, INPUT_DIM)
            x_reconst, _, mu, sigma = model(x)

            reconst_loss, kl_div = loss_fn(x_reconst, x, mu, sigma)['loss']

            # Backprop and optimize

            kl_weight = kl_coeff_set(epoch)
            
            kl_div = kl_weight * kl_div
            
            loss = reconst_loss + kl_div
            
#             print(loss)
            
            
            #wandb.log({"examples": images}
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss = epoch_loss + loss.item()
            epoch_reconst_loss = epoch_reconst_loss + reconst_loss.item()
            epoch_kl_div = epoch_kl_div + kl_div.item()
            loop.set_postfix(loss=loss.item())

        wandb.log({"total_loss": loss,
                       "reconst_loss": reconst_loss,
                       "kl_div": kl_div})
        

        if(epoch%10 == 0):
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': epoch_loss,
                }, "data/models/" + RUN_NAME + ".pt")
        if best_loss is None or best_loss > loss:
            best_loss = loss
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': epoch_loss,
                }, "data/models/best" + RUN_NAME + ".pt")
            
            

# Initialize model, optimizer, loss
model = VanillaVAE(INPUT_DIM, Z_DIM).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR_RATE)
loss_fn = model.loss_function

train(NUM_EPOCHS, model, optimizer, model.loss_function)

Epoch 1 of 400


1it [00:02,  2.00s/it, loss=0.353]

tensor(0.3915, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.3526, device='cuda:0', grad_fn=<AddBackward0>)


1it [00:02,  2.00s/it, loss=0.249]

tensor(0.2493, device='cuda:0', grad_fn=<AddBackward0>)


4it [00:02,  2.13it/s, loss=0.284]

tensor(0.2837, device='cuda:0', grad_fn=<AddBackward0>)


5it [00:02,  2.38it/s, loss=0.286]

tensor(0.2863, device='cuda:0', grad_fn=<AddBackward0>)


6it [00:02,  2.64it/s, loss=0.243]

tensor(0.2430, device='cuda:0', grad_fn=<AddBackward0>)


7it [00:03,  2.82it/s, loss=0.253]

tensor(0.2533, device='cuda:0', grad_fn=<AddBackward0>)


8it [00:03,  3.00it/s, loss=0.227]

tensor(0.2270, device='cuda:0', grad_fn=<AddBackward0>)


9it [00:03,  3.09it/s, loss=0.203]

tensor(0.2034, device='cuda:0', grad_fn=<AddBackward0>)


10it [00:04,  3.24it/s, loss=0.196]

tensor(0.1958, device='cuda:0', grad_fn=<AddBackward0>)


11it [00:04,  3.31it/s, loss=0.183]

tensor(0.1828, device='cuda:0', grad_fn=<AddBackward0>)


12it [00:04,  3.42it/s, loss=0.165]

tensor(0.1645, device='cuda:0', grad_fn=<AddBackward0>)


13it [00:04,  3.45it/s, loss=0.172]

tensor(0.1721, device='cuda:0', grad_fn=<AddBackward0>)


14it [00:05,  3.52it/s, loss=0.141]

tensor(0.1407, device='cuda:0', grad_fn=<AddBackward0>)


15it [00:05,  3.46it/s, loss=0.115]

tensor(0.1147, device='cuda:0', grad_fn=<AddBackward0>)


16it [00:05,  3.53it/s, loss=0.127]

tensor(0.1272, device='cuda:0', grad_fn=<AddBackward0>)


17it [00:06,  3.52it/s, loss=0.109]

tensor(0.1087, device='cuda:0', grad_fn=<AddBackward0>)


18it [00:06,  3.57it/s, loss=0.106]

tensor(0.1065, device='cuda:0', grad_fn=<AddBackward0>)


19it [00:06,  3.54it/s, loss=0.0878]

tensor(0.0878, device='cuda:0', grad_fn=<AddBackward0>)


20it [00:06,  3.59it/s, loss=0.0912]

tensor(0.0912, device='cuda:0', grad_fn=<AddBackward0>)


21it [00:07,  3.57it/s, loss=0.0763]

tensor(0.0763, device='cuda:0', grad_fn=<AddBackward0>)


22it [00:07,  3.60it/s, loss=0.0778]

tensor(0.0778, device='cuda:0', grad_fn=<AddBackward0>)


23it [00:07,  3.51it/s, loss=0.0762]

tensor(0.0762, device='cuda:0', grad_fn=<AddBackward0>)


24it [00:07,  3.58it/s, loss=0.0771]

tensor(0.0771, device='cuda:0', grad_fn=<AddBackward0>)


25it [00:08,  3.51it/s, loss=0.0739]

tensor(0.0739, device='cuda:0', grad_fn=<AddBackward0>)


26it [00:08,  3.53it/s, loss=0.0753]

tensor(0.0753, device='cuda:0', grad_fn=<AddBackward0>)


27it [00:08,  3.49it/s, loss=0.0709]

tensor(0.0709, device='cuda:0', grad_fn=<AddBackward0>)


28it [00:09,  3.51it/s, loss=0.065] 

tensor(0.0650, device='cuda:0', grad_fn=<AddBackward0>)


29it [00:09,  3.46it/s, loss=0.0669]

tensor(0.0669, device='cuda:0', grad_fn=<AddBackward0>)


30it [00:09,  3.49it/s, loss=0.0686]

tensor(0.0686, device='cuda:0', grad_fn=<AddBackward0>)


31it [00:09,  3.50it/s, loss=0.0638]

tensor(0.0638, device='cuda:0', grad_fn=<AddBackward0>)


32it [00:10,  3.34it/s, loss=0.0619]

tensor(0.0619, device='cuda:0', grad_fn=<AddBackward0>)


33it [00:10,  3.25it/s, loss=0.0618]

tensor(0.0618, device='cuda:0', grad_fn=<AddBackward0>)


34it [00:10,  3.21it/s, loss=0.0592]

tensor(0.0592, device='cuda:0', grad_fn=<AddBackward0>)


35it [00:11,  3.26it/s, loss=0.0616]

tensor(0.0616, device='cuda:0', grad_fn=<AddBackward0>)


36it [00:11,  3.38it/s, loss=0.0595]

tensor(0.0595, device='cuda:0', grad_fn=<AddBackward0>)


37it [00:11,  3.41it/s, loss=0.0547]

tensor(0.0547, device='cuda:0', grad_fn=<AddBackward0>)


38it [00:12,  3.51it/s, loss=0.0527]

tensor(0.0527, device='cuda:0', grad_fn=<AddBackward0>)


39it [00:12,  3.34it/s, loss=0.0568]

tensor(0.0568, device='cuda:0', grad_fn=<AddBackward0>)


40it [00:12,  3.28it/s, loss=0.0604]

tensor(0.0604, device='cuda:0', grad_fn=<AddBackward0>)


41it [00:13,  3.26it/s, loss=0.059] 

tensor(0.0590, device='cuda:0', grad_fn=<AddBackward0>)


42it [00:13,  3.21it/s, loss=0.0555]

tensor(0.0555, device='cuda:0', grad_fn=<AddBackward0>)


43it [00:13,  3.11it/s, loss=0.0506]

tensor(0.0506, device='cuda:0', grad_fn=<AddBackward0>)


44it [00:14,  3.15it/s, loss=0.0501]

tensor(0.0501, device='cuda:0', grad_fn=<AddBackward0>)


45it [00:14,  3.16it/s, loss=0.0534]

tensor(0.0534, device='cuda:0', grad_fn=<AddBackward0>)


46it [00:14,  3.08it/s, loss=0.0561]

tensor(0.0561, device='cuda:0', grad_fn=<AddBackward0>)


47it [00:15,  3.06it/s, loss=0.052] 

tensor(0.0520, device='cuda:0', grad_fn=<AddBackward0>)


48it [00:15,  3.15it/s, loss=0.0497]

tensor(0.0497, device='cuda:0', grad_fn=<AddBackward0>)


49it [00:15,  3.08it/s, loss=0.0479]

tensor(0.0479, device='cuda:0', grad_fn=<AddBackward0>)


50it [00:15,  3.16it/s, loss=0.0507]

tensor(0.0507, device='cuda:0', grad_fn=<AddBackward0>)


51it [00:16,  3.11it/s, loss=0.0494]

tensor(0.0494, device='cuda:0', grad_fn=<AddBackward0>)


52it [00:16,  2.95it/s, loss=0.0485]

tensor(0.0485, device='cuda:0', grad_fn=<AddBackward0>)


53it [00:17,  2.91it/s, loss=0.0473]

tensor(0.0473, device='cuda:0', grad_fn=<AddBackward0>)


54it [00:17,  2.98it/s, loss=0.0454]

tensor(0.0454, device='cuda:0', grad_fn=<AddBackward0>)


55it [00:17,  2.92it/s, loss=0.0501]

tensor(0.0501, device='cuda:0', grad_fn=<AddBackward0>)


56it [00:17,  3.01it/s, loss=0.0487]

tensor(0.0487, device='cuda:0', grad_fn=<AddBackward0>)


57it [00:18,  3.01it/s, loss=0.0417]

tensor(0.0417, device='cuda:0', grad_fn=<AddBackward0>)


58it [00:18,  2.97it/s, loss=0.0457]

tensor(0.0457, device='cuda:0', grad_fn=<AddBackward0>)


59it [00:19,  3.00it/s, loss=0.0429]

tensor(0.0429, device='cuda:0', grad_fn=<AddBackward0>)


60it [00:19,  3.00it/s, loss=0.0421]

tensor(0.0421, device='cuda:0', grad_fn=<AddBackward0>)


61it [00:19,  2.81it/s, loss=0.0403]

tensor(0.0403, device='cuda:0', grad_fn=<AddBackward0>)


62it [00:20,  2.96it/s, loss=0.044] 

tensor(0.0440, device='cuda:0', grad_fn=<AddBackward0>)


63it [00:20,  2.95it/s, loss=0.0383]

tensor(0.0383, device='cuda:0', grad_fn=<AddBackward0>)


64it [00:20,  3.00it/s, loss=0.0405]

tensor(0.0405, device='cuda:0', grad_fn=<AddBackward0>)


65it [00:21,  3.03it/s, loss=0.041] 

tensor(0.0410, device='cuda:0', grad_fn=<AddBackward0>)


66it [00:21,  3.11it/s, loss=0.0405]

tensor(0.0405, device='cuda:0', grad_fn=<AddBackward0>)


67it [00:21,  3.05it/s, loss=0.0437]

tensor(0.0437, device='cuda:0', grad_fn=<AddBackward0>)


68it [00:21,  3.07it/s, loss=0.0384]

tensor(0.0384, device='cuda:0', grad_fn=<AddBackward0>)


69it [00:22,  3.05it/s, loss=0.0407]

tensor(0.0407, device='cuda:0', grad_fn=<AddBackward0>)


70it [00:22,  3.07it/s, loss=0.0396]

tensor(0.0396, device='cuda:0', grad_fn=<AddBackward0>)


71it [00:22,  3.11it/s, loss=0.0386]

tensor(0.0386, device='cuda:0', grad_fn=<AddBackward0>)


72it [00:23,  3.16it/s, loss=0.041] 

tensor(0.0410, device='cuda:0', grad_fn=<AddBackward0>)


73it [00:23,  3.12it/s, loss=0.0366]

tensor(0.0366, device='cuda:0', grad_fn=<AddBackward0>)


74it [00:23,  3.17it/s, loss=0.0381]

tensor(0.0381, device='cuda:0', grad_fn=<AddBackward0>)


75it [00:24,  3.07it/s, loss=0.0377]

tensor(0.0377, device='cuda:0', grad_fn=<AddBackward0>)


76it [00:24,  3.16it/s, loss=0.0361]

tensor(0.0361, device='cuda:0', grad_fn=<AddBackward0>)


77it [00:24,  3.16it/s, loss=0.0408]

tensor(0.0408, device='cuda:0', grad_fn=<AddBackward0>)


78it [00:25,  3.16it/s, loss=0.0371]

tensor(0.0371, device='cuda:0', grad_fn=<AddBackward0>)


79it [00:25,  3.12it/s, loss=0.0382]

tensor(0.0382, device='cuda:0', grad_fn=<AddBackward0>)


80it [00:25,  3.20it/s, loss=0.0365]

tensor(0.0365, device='cuda:0', grad_fn=<AddBackward0>)


81it [00:26,  3.16it/s, loss=0.0391]

tensor(0.0391, device='cuda:0', grad_fn=<AddBackward0>)


82it [00:26,  3.20it/s, loss=0.037] 

tensor(0.0370, device='cuda:0', grad_fn=<AddBackward0>)


83it [00:26,  3.20it/s, loss=0.0382]

tensor(0.0382, device='cuda:0', grad_fn=<AddBackward0>)


84it [00:27,  3.27it/s, loss=0.0361]

tensor(0.0361, device='cuda:0', grad_fn=<AddBackward0>)


85it [00:27,  3.14it/s, loss=0.0386]

tensor(0.0386, device='cuda:0', grad_fn=<AddBackward0>)


86it [00:27,  3.16it/s, loss=0.0365]

tensor(0.0365, device='cuda:0', grad_fn=<AddBackward0>)


87it [00:28,  3.12it/s, loss=0.0369]

tensor(0.0369, device='cuda:0', grad_fn=<AddBackward0>)


88it [00:28,  3.17it/s, loss=0.0369]

tensor(0.0369, device='cuda:0', grad_fn=<AddBackward0>)


89it [00:28,  3.15it/s, loss=0.0351]

tensor(0.0351, device='cuda:0', grad_fn=<AddBackward0>)


90it [00:28,  3.14it/s, loss=0.0363]

tensor(0.0363, device='cuda:0', grad_fn=<AddBackward0>)


91it [00:29,  3.03it/s, loss=0.0329]

tensor(0.0329, device='cuda:0', grad_fn=<AddBackward0>)


92it [00:29,  3.11it/s, loss=0.0361]

tensor(0.0361, device='cuda:0', grad_fn=<AddBackward0>)


93it [00:29,  3.00it/s, loss=0.0338]

tensor(0.0338, device='cuda:0', grad_fn=<AddBackward0>)


94it [00:30,  3.02it/s, loss=0.032] 

tensor(0.0320, device='cuda:0', grad_fn=<AddBackward0>)


95it [00:30,  3.04it/s, loss=0.0329]

tensor(0.0329, device='cuda:0', grad_fn=<AddBackward0>)


96it [00:30,  3.06it/s, loss=0.0346]

tensor(0.0346, device='cuda:0', grad_fn=<AddBackward0>)


97it [00:31,  3.07it/s, loss=0.0309]

tensor(0.0309, device='cuda:0', grad_fn=<AddBackward0>)


98it [00:31,  3.07it/s, loss=0.0344]

tensor(0.0344, device='cuda:0', grad_fn=<AddBackward0>)


99it [00:31,  3.05it/s, loss=0.0336]

tensor(0.0336, device='cuda:0', grad_fn=<AddBackward0>)


100it [00:32,  3.07it/s, loss=0.032]

tensor(0.0320, device='cuda:0', grad_fn=<AddBackward0>)


101it [00:32,  2.96it/s, loss=0.0361]

tensor(0.0361, device='cuda:0', grad_fn=<AddBackward0>)


102it [00:32,  3.06it/s, loss=0.0311]

tensor(0.0311, device='cuda:0', grad_fn=<AddBackward0>)


103it [00:33,  3.06it/s, loss=0.0292]

tensor(0.0292, device='cuda:0', grad_fn=<AddBackward0>)


104it [00:33,  3.12it/s, loss=0.0311]

tensor(0.0311, device='cuda:0', grad_fn=<AddBackward0>)


105it [00:33,  3.09it/s, loss=0.0305]

tensor(0.0305, device='cuda:0', grad_fn=<AddBackward0>)


106it [00:34,  3.12it/s, loss=0.0305]

tensor(0.0305, device='cuda:0', grad_fn=<AddBackward0>)


107it [00:34,  3.12it/s, loss=0.0333]

tensor(0.0333, device='cuda:0', grad_fn=<AddBackward0>)


108it [00:34,  3.18it/s, loss=0.0306]

tensor(0.0306, device='cuda:0', grad_fn=<AddBackward0>)


109it [00:35,  3.15it/s, loss=0.0314]

tensor(0.0314, device='cuda:0', grad_fn=<AddBackward0>)


110it [00:35,  3.21it/s, loss=0.0288]

tensor(0.0288, device='cuda:0', grad_fn=<AddBackward0>)


111it [00:35,  3.09it/s, loss=0.0297]

tensor(0.0297, device='cuda:0', grad_fn=<AddBackward0>)


112it [00:36,  3.20it/s, loss=0.0316]

tensor(0.0316, device='cuda:0', grad_fn=<AddBackward0>)


113it [00:36,  3.15it/s, loss=0.0318]

tensor(0.0318, device='cuda:0', grad_fn=<AddBackward0>)


114it [00:36,  3.14it/s, loss=0.0303]

tensor(0.0303, device='cuda:0', grad_fn=<AddBackward0>)


115it [00:37,  3.17it/s, loss=0.028] 

tensor(0.0280, device='cuda:0', grad_fn=<AddBackward0>)


116it [00:37,  3.18it/s, loss=0.0301]

tensor(0.0301, device='cuda:0', grad_fn=<AddBackward0>)


117it [00:37,  3.09it/s, loss=0.031] 

tensor(0.0310, device='cuda:0', grad_fn=<AddBackward0>)


118it [00:37,  3.16it/s, loss=0.0279]

tensor(0.0279, device='cuda:0', grad_fn=<AddBackward0>)


119it [00:38,  3.15it/s, loss=0.0294]

tensor(0.0294, device='cuda:0', grad_fn=<AddBackward0>)


120it [00:38,  3.18it/s, loss=0.0273]

tensor(0.0273, device='cuda:0', grad_fn=<AddBackward0>)


121it [00:38,  3.14it/s, loss=0.0269]

tensor(0.0269, device='cuda:0', grad_fn=<AddBackward0>)


122it [00:39,  3.15it/s, loss=0.0299]

tensor(0.0299, device='cuda:0', grad_fn=<AddBackward0>)


123it [00:39,  3.16it/s, loss=0.0273]

tensor(0.0273, device='cuda:0', grad_fn=<AddBackward0>)


124it [00:39,  3.12it/s, loss=0.0272]

tensor(0.0272, device='cuda:0', grad_fn=<AddBackward0>)


125it [00:40,  3.13it/s, loss=0.027] 

tensor(0.0270, device='cuda:0', grad_fn=<AddBackward0>)


126it [00:40,  3.12it/s, loss=0.0303]

tensor(0.0303, device='cuda:0', grad_fn=<AddBackward0>)


127it [00:40,  3.08it/s, loss=0.0277]

tensor(0.0277, device='cuda:0', grad_fn=<AddBackward0>)


128it [00:41,  3.11it/s, loss=0.0289]

tensor(0.0289, device='cuda:0', grad_fn=<AddBackward0>)


129it [00:41,  3.06it/s, loss=0.0245]

tensor(0.0245, device='cuda:0', grad_fn=<AddBackward0>)


130it [00:41,  3.10it/s, loss=0.0288]

tensor(0.0288, device='cuda:0', grad_fn=<AddBackward0>)


131it [00:42,  3.07it/s, loss=0.0282]

tensor(0.0282, device='cuda:0', grad_fn=<AddBackward0>)


132it [00:42,  3.11it/s, loss=0.0269]

tensor(0.0269, device='cuda:0', grad_fn=<AddBackward0>)


133it [00:42,  3.05it/s, loss=0.0275]

tensor(0.0275, device='cuda:0', grad_fn=<AddBackward0>)


134it [00:43,  3.09it/s, loss=0.0275]

tensor(0.0275, device='cuda:0', grad_fn=<AddBackward0>)


135it [00:43,  3.04it/s, loss=0.0276]

tensor(0.0276, device='cuda:0', grad_fn=<AddBackward0>)


136it [00:43,  3.08it/s, loss=0.0287]

tensor(0.0287, device='cuda:0', grad_fn=<AddBackward0>)


137it [00:44,  3.11it/s, loss=0.0293]

tensor(0.0293, device='cuda:0', grad_fn=<AddBackward0>)


138it [00:44,  3.17it/s, loss=0.0284]

tensor(0.0284, device='cuda:0', grad_fn=<AddBackward0>)


139it [00:44,  3.09it/s, loss=0.0266]

tensor(0.0266, device='cuda:0', grad_fn=<AddBackward0>)


140it [00:45,  3.17it/s, loss=0.026] 

tensor(0.0260, device='cuda:0', grad_fn=<AddBackward0>)


141it [00:45,  3.10it/s, loss=0.0256]

tensor(0.0256, device='cuda:0', grad_fn=<AddBackward0>)


142it [00:45,  3.12it/s, loss=0.0263]

tensor(0.0263, device='cuda:0', grad_fn=<AddBackward0>)


143it [00:46,  3.11it/s, loss=0.0262]

tensor(0.0262, device='cuda:0', grad_fn=<AddBackward0>)


144it [00:46,  3.15it/s, loss=0.0272]

tensor(0.0272, device='cuda:0', grad_fn=<AddBackward0>)


145it [00:46,  3.06it/s, loss=0.0258]

tensor(0.0258, device='cuda:0', grad_fn=<AddBackward0>)


146it [00:46,  3.15it/s, loss=0.0253]

tensor(0.0253, device='cuda:0', grad_fn=<AddBackward0>)


147it [00:47,  3.07it/s, loss=0.0253]

tensor(0.0253, device='cuda:0', grad_fn=<AddBackward0>)


148it [00:47,  3.09it/s, loss=0.0256]

tensor(0.0256, device='cuda:0', grad_fn=<AddBackward0>)


149it [00:47,  3.04it/s, loss=0.0267]

tensor(0.0267, device='cuda:0', grad_fn=<AddBackward0>)


150it [00:48,  3.02it/s, loss=0.0257]

tensor(0.0257, device='cuda:0', grad_fn=<AddBackward0>)


151it [00:48,  3.00it/s, loss=0.0245]

tensor(0.0245, device='cuda:0', grad_fn=<AddBackward0>)


152it [00:49,  2.99it/s, loss=0.0273]

tensor(0.0273, device='cuda:0', grad_fn=<AddBackward0>)


153it [00:49,  3.01it/s, loss=0.0266]

tensor(0.0266, device='cuda:0', grad_fn=<AddBackward0>)


154it [00:49,  3.07it/s, loss=0.0246]

tensor(0.0246, device='cuda:0', grad_fn=<AddBackward0>)


155it [00:49,  3.05it/s, loss=0.0238]

tensor(0.0238, device='cuda:0', grad_fn=<AddBackward0>)


156it [00:50,  3.08it/s, loss=0.0263]

tensor(0.0263, device='cuda:0', grad_fn=<AddBackward0>)


157it [00:50,  3.02it/s, loss=0.0248]

tensor(0.0248, device='cuda:0', grad_fn=<AddBackward0>)


158it [00:50,  3.06it/s, loss=0.0252]

tensor(0.0252, device='cuda:0', grad_fn=<AddBackward0>)


159it [00:51,  3.04it/s, loss=0.0275]

tensor(0.0275, device='cuda:0', grad_fn=<AddBackward0>)


160it [00:51,  3.05it/s, loss=0.0239]

tensor(0.0239, device='cuda:0', grad_fn=<AddBackward0>)


161it [00:51,  3.05it/s, loss=0.0234]

tensor(0.0234, device='cuda:0', grad_fn=<AddBackward0>)


162it [00:52,  3.10it/s, loss=0.0239]

tensor(0.0239, device='cuda:0', grad_fn=<AddBackward0>)


163it [00:52,  3.15it/s, loss=0.0238]

tensor(0.0238, device='cuda:0', grad_fn=<AddBackward0>)


164it [00:52,  3.15it/s, loss=0.0257]

tensor(0.0257, device='cuda:0', grad_fn=<AddBackward0>)


165it [00:53,  3.15it/s, loss=0.0248]

tensor(0.0248, device='cuda:0', grad_fn=<AddBackward0>)


166it [00:53,  3.08it/s, loss=0.0254]

tensor(0.0254, device='cuda:0', grad_fn=<AddBackward0>)


167it [00:53,  3.09it/s, loss=0.0237]

tensor(0.0237, device='cuda:0', grad_fn=<AddBackward0>)


168it [00:54,  3.16it/s, loss=0.0264]

tensor(0.0264, device='cuda:0', grad_fn=<AddBackward0>)


169it [00:54,  3.10it/s, loss=0.022] 

tensor(0.0220, device='cuda:0', grad_fn=<AddBackward0>)


170it [00:54,  3.01it/s, loss=0.0249]

tensor(0.0249, device='cuda:0', grad_fn=<AddBackward0>)


171it [00:55,  2.99it/s, loss=0.0243]

tensor(0.0243, device='cuda:0', grad_fn=<AddBackward0>)


172it [00:55,  3.06it/s, loss=0.0257]

tensor(0.0257, device='cuda:0', grad_fn=<AddBackward0>)


173it [00:55,  3.01it/s, loss=0.0242]

tensor(0.0242, device='cuda:0', grad_fn=<AddBackward0>)


174it [00:56,  3.04it/s, loss=0.0238]

tensor(0.0238, device='cuda:0', grad_fn=<AddBackward0>)


175it [00:56,  3.04it/s, loss=0.0239]

tensor(0.0239, device='cuda:0', grad_fn=<AddBackward0>)


176it [00:56,  3.08it/s, loss=0.0228]

tensor(0.0228, device='cuda:0', grad_fn=<AddBackward0>)


177it [00:57,  3.07it/s, loss=0.0243]

tensor(0.0243, device='cuda:0', grad_fn=<AddBackward0>)


178it [00:57,  3.13it/s, loss=0.0222]

tensor(0.0222, device='cuda:0', grad_fn=<AddBackward0>)


179it [00:57,  3.04it/s, loss=0.0268]

tensor(0.0268, device='cuda:0', grad_fn=<AddBackward0>)


180it [00:58,  3.10it/s, loss=0.0228]

tensor(0.0228, device='cuda:0', grad_fn=<AddBackward0>)


181it [00:58,  3.05it/s, loss=0.0251]

tensor(0.0251, device='cuda:0', grad_fn=<AddBackward0>)


182it [00:58,  3.08it/s, loss=0.0223]

tensor(0.0223, device='cuda:0', grad_fn=<AddBackward0>)


183it [00:59,  3.06it/s, loss=0.0256]

tensor(0.0256, device='cuda:0', grad_fn=<AddBackward0>)


184it [00:59,  3.08it/s, loss=0.0238]

tensor(0.0238, device='cuda:0', grad_fn=<AddBackward0>)


185it [00:59,  2.96it/s, loss=0.0206]

tensor(0.0206, device='cuda:0', grad_fn=<AddBackward0>)


186it [01:00,  3.11it/s, loss=0.024] 

tensor(0.0240, device='cuda:0', grad_fn=<AddBackward0>)


187it [01:00,  3.07it/s, loss=0.0234]

tensor(0.0234, device='cuda:0', grad_fn=<AddBackward0>)


188it [01:00,  2.98it/s, loss=0.0233]

tensor(0.0233, device='cuda:0', grad_fn=<AddBackward0>)


189it [01:01,  3.03it/s, loss=0.0251]

tensor(0.0251, device='cuda:0', grad_fn=<AddBackward0>)


190it [01:01,  3.06it/s, loss=0.0242]

tensor(0.0242, device='cuda:0', grad_fn=<AddBackward0>)


191it [01:01,  2.97it/s, loss=0.0241]

tensor(0.0241, device='cuda:0', grad_fn=<AddBackward0>)


192it [01:02,  3.05it/s, loss=0.0217]

tensor(0.0217, device='cuda:0', grad_fn=<AddBackward0>)


193it [01:02,  3.00it/s, loss=0.0254]

tensor(0.0254, device='cuda:0', grad_fn=<AddBackward0>)


194it [01:02,  3.00it/s, loss=0.0218]

tensor(0.0218, device='cuda:0', grad_fn=<AddBackward0>)


195it [01:03,  3.09it/s, loss=0.0242]

tensor(0.0242, device='cuda:0', grad_fn=<AddBackward0>)


196it [01:03,  3.18it/s, loss=0.0234]

tensor(0.0234, device='cuda:0', grad_fn=<AddBackward0>)


197it [01:03,  3.08it/s, loss=0.0237]

tensor(0.0237, device='cuda:0', grad_fn=<AddBackward0>)


198it [01:04,  3.10it/s, loss=0.0241]

tensor(0.0241, device='cuda:0', grad_fn=<AddBackward0>)


199it [01:04,  3.06it/s, loss=0.0241]

tensor(0.0241, device='cuda:0', grad_fn=<AddBackward0>)


200it [01:04,  3.07it/s, loss=0.0226]

tensor(0.0226, device='cuda:0', grad_fn=<AddBackward0>)


201it [01:04,  3.07it/s, loss=0.0239]

tensor(0.0239, device='cuda:0', grad_fn=<AddBackward0>)


202it [01:05,  3.08it/s, loss=0.0242]

tensor(0.0242, device='cuda:0', grad_fn=<AddBackward0>)


203it [01:05,  2.99it/s, loss=0.0243]

tensor(0.0243, device='cuda:0', grad_fn=<AddBackward0>)


204it [01:05,  3.03it/s, loss=0.0217]

tensor(0.0217, device='cuda:0', grad_fn=<AddBackward0>)


205it [01:06,  3.01it/s, loss=0.0233]

tensor(0.0233, device='cuda:0', grad_fn=<AddBackward0>)


206it [01:06,  3.04it/s, loss=0.023] 

tensor(0.0230, device='cuda:0', grad_fn=<AddBackward0>)


207it [01:06,  3.04it/s, loss=0.0234]

tensor(0.0234, device='cuda:0', grad_fn=<AddBackward0>)


208it [01:07,  3.10it/s, loss=0.0245]

tensor(0.0245, device='cuda:0', grad_fn=<AddBackward0>)


209it [01:07,  3.10it/s, loss=0.0221]

tensor(0.0221, device='cuda:0', grad_fn=<AddBackward0>)


210it [01:07,  3.08it/s, loss=0.0192]

tensor(0.0192, device='cuda:0', grad_fn=<AddBackward0>)


211it [01:08,  3.03it/s, loss=0.0226]

tensor(0.0226, device='cuda:0', grad_fn=<AddBackward0>)


212it [01:08,  3.01it/s, loss=0.0226]

tensor(0.0226, device='cuda:0', grad_fn=<AddBackward0>)


213it [01:08,  3.00it/s, loss=0.019] 

tensor(0.0190, device='cuda:0', grad_fn=<AddBackward0>)


214it [01:09,  3.01it/s, loss=0.0224]

tensor(0.0224, device='cuda:0', grad_fn=<AddBackward0>)


215it [01:09,  2.88it/s, loss=0.0212]

tensor(0.0212, device='cuda:0', grad_fn=<AddBackward0>)


216it [01:10,  2.86it/s, loss=0.0218]

tensor(0.0218, device='cuda:0', grad_fn=<AddBackward0>)


217it [01:10,  2.90it/s, loss=0.0217]

tensor(0.0217, device='cuda:0', grad_fn=<AddBackward0>)


218it [01:10,  2.98it/s, loss=0.0219]

tensor(0.0219, device='cuda:0', grad_fn=<AddBackward0>)


219it [01:10,  3.00it/s, loss=0.0226]

tensor(0.0226, device='cuda:0', grad_fn=<AddBackward0>)


220it [01:11,  3.09it/s, loss=0.0213]

tensor(0.0213, device='cuda:0', grad_fn=<AddBackward0>)


221it [01:11,  2.85it/s, loss=0.0214]

tensor(0.0214, device='cuda:0', grad_fn=<AddBackward0>)


222it [01:12,  2.95it/s, loss=0.0216]

tensor(0.0216, device='cuda:0', grad_fn=<AddBackward0>)


223it [01:12,  2.98it/s, loss=0.0216]

tensor(0.0216, device='cuda:0', grad_fn=<AddBackward0>)


224it [01:12,  2.97it/s, loss=0.0226]

tensor(0.0226, device='cuda:0', grad_fn=<AddBackward0>)


225it [01:13,  3.00it/s, loss=0.0234]

tensor(0.0234, device='cuda:0', grad_fn=<AddBackward0>)


226it [01:13,  3.05it/s, loss=0.023] 

tensor(0.0230, device='cuda:0', grad_fn=<AddBackward0>)


227it [01:13,  3.03it/s, loss=0.0227]

tensor(0.0227, device='cuda:0', grad_fn=<AddBackward0>)


228it [01:13,  3.11it/s, loss=0.0213]

tensor(0.0213, device='cuda:0', grad_fn=<AddBackward0>)


229it [01:14,  3.10it/s, loss=0.0216]

tensor(0.0216, device='cuda:0', grad_fn=<AddBackward0>)


230it [01:14,  3.13it/s, loss=0.0215]

tensor(0.0215, device='cuda:0', grad_fn=<AddBackward0>)


231it [01:14,  3.12it/s, loss=0.021] 

tensor(0.0210, device='cuda:0', grad_fn=<AddBackward0>)


232it [01:15,  3.15it/s, loss=0.0201]

tensor(0.0201, device='cuda:0', grad_fn=<AddBackward0>)


233it [01:15,  3.12it/s, loss=0.0195]

tensor(0.0195, device='cuda:0', grad_fn=<AddBackward0>)


234it [01:15,  3.12it/s, loss=0.0219]

tensor(0.0219, device='cuda:0', grad_fn=<AddBackward0>)


235it [01:16,  3.08it/s, loss=0.0204]

tensor(0.0204, device='cuda:0', grad_fn=<AddBackward0>)


236it [01:16,  2.95it/s, loss=0.0217]

tensor(0.0217, device='cuda:0', grad_fn=<AddBackward0>)


237it [01:16,  2.96it/s, loss=0.0213]

tensor(0.0213, device='cuda:0', grad_fn=<AddBackward0>)


238it [01:17,  3.00it/s, loss=0.0224]

tensor(0.0224, device='cuda:0', grad_fn=<AddBackward0>)


239it [01:17,  2.97it/s, loss=0.0206]

tensor(0.0206, device='cuda:0', grad_fn=<AddBackward0>)


240it [01:17,  3.03it/s, loss=0.0197]

tensor(0.0197, device='cuda:0', grad_fn=<AddBackward0>)


241it [01:18,  3.03it/s, loss=0.0204]

tensor(0.0204, device='cuda:0', grad_fn=<AddBackward0>)


242it [01:18,  3.05it/s, loss=0.0216]

tensor(0.0216, device='cuda:0', grad_fn=<AddBackward0>)


243it [01:18,  2.94it/s, loss=0.021] 

tensor(0.0210, device='cuda:0', grad_fn=<AddBackward0>)


244it [01:19,  2.97it/s, loss=0.0186]

tensor(0.0186, device='cuda:0', grad_fn=<AddBackward0>)


245it [01:19,  2.95it/s, loss=0.0199]

tensor(0.0199, device='cuda:0', grad_fn=<AddBackward0>)


246it [01:19,  2.98it/s, loss=0.02]  

tensor(0.0200, device='cuda:0', grad_fn=<AddBackward0>)


247it [01:20,  3.01it/s, loss=0.0196]

tensor(0.0196, device='cuda:0', grad_fn=<AddBackward0>)


248it [01:20,  2.94it/s, loss=0.0197]

tensor(0.0197, device='cuda:0', grad_fn=<AddBackward0>)


249it [01:20,  2.94it/s, loss=0.0205]

tensor(0.0205, device='cuda:0', grad_fn=<AddBackward0>)


250it [01:21,  3.03it/s, loss=0.0201]

tensor(0.0201, device='cuda:0', grad_fn=<AddBackward0>)


251it [01:21,  2.97it/s, loss=0.0217]

tensor(0.0217, device='cuda:0', grad_fn=<AddBackward0>)


252it [01:21,  3.07it/s, loss=0.0209]

tensor(0.0209, device='cuda:0', grad_fn=<AddBackward0>)


253it [01:22,  3.04it/s, loss=0.0217]

tensor(0.0217, device='cuda:0', grad_fn=<AddBackward0>)


254it [01:22,  3.11it/s, loss=0.0189]

tensor(0.0189, device='cuda:0', grad_fn=<AddBackward0>)


255it [01:22,  3.05it/s, loss=0.0221]

tensor(0.0221, device='cuda:0', grad_fn=<AddBackward0>)


256it [01:23,  3.14it/s, loss=0.0193]

tensor(0.0193, device='cuda:0', grad_fn=<AddBackward0>)


257it [01:23,  3.13it/s, loss=0.0218]

tensor(0.0218, device='cuda:0', grad_fn=<AddBackward0>)


258it [01:23,  3.13it/s, loss=0.0191]

tensor(0.0191, device='cuda:0', grad_fn=<AddBackward0>)


259it [01:24,  3.14it/s, loss=0.0192]

tensor(0.0192, device='cuda:0', grad_fn=<AddBackward0>)


260it [01:24,  3.13it/s, loss=0.0215]

tensor(0.0215, device='cuda:0', grad_fn=<AddBackward0>)


261it [01:24,  3.04it/s, loss=0.0206]

tensor(0.0206, device='cuda:0', grad_fn=<AddBackward0>)


262it [01:25,  3.16it/s, loss=0.0187]

tensor(0.0187, device='cuda:0', grad_fn=<AddBackward0>)


263it [01:25,  3.13it/s, loss=0.0202]

tensor(0.0202, device='cuda:0', grad_fn=<AddBackward0>)


264it [01:25,  3.12it/s, loss=0.0197]

tensor(0.0197, device='cuda:0', grad_fn=<AddBackward0>)


265it [01:26,  3.06it/s, loss=0.0204]

tensor(0.0204, device='cuda:0', grad_fn=<AddBackward0>)


266it [01:26,  3.05it/s, loss=0.0204]

tensor(0.0204, device='cuda:0', grad_fn=<AddBackward0>)


267it [01:26,  2.99it/s, loss=0.018] 

tensor(0.0180, device='cuda:0', grad_fn=<AddBackward0>)


268it [01:27,  3.05it/s, loss=0.0204]

tensor(0.0204, device='cuda:0', grad_fn=<AddBackward0>)


269it [01:27,  3.02it/s, loss=0.0187]

tensor(0.0187, device='cuda:0', grad_fn=<AddBackward0>)


270it [01:27,  3.04it/s, loss=0.0194]

tensor(0.0194, device='cuda:0', grad_fn=<AddBackward0>)


271it [01:28,  2.99it/s, loss=0.0213]

tensor(0.0213, device='cuda:0', grad_fn=<AddBackward0>)


272it [01:28,  2.76it/s, loss=0.0173]

tensor(0.0173, device='cuda:0', grad_fn=<AddBackward0>)


273it [01:28,  2.65it/s, loss=0.0176]

tensor(0.0176, device='cuda:0', grad_fn=<AddBackward0>)


274it [01:29,  2.83it/s, loss=0.0191]

tensor(0.0191, device='cuda:0', grad_fn=<AddBackward0>)


275it [01:29,  2.86it/s, loss=0.0203]

tensor(0.0203, device='cuda:0', grad_fn=<AddBackward0>)


276it [01:29,  2.98it/s, loss=0.0202]

tensor(0.0202, device='cuda:0', grad_fn=<AddBackward0>)


277it [01:30,  2.94it/s, loss=0.0192]

tensor(0.0192, device='cuda:0', grad_fn=<AddBackward0>)


278it [01:30,  3.07it/s, loss=0.019] 

tensor(0.0190, device='cuda:0', grad_fn=<AddBackward0>)


279it [01:30,  3.07it/s, loss=0.0185]

tensor(0.0185, device='cuda:0', grad_fn=<AddBackward0>)


280it [01:31,  3.11it/s, loss=0.0201]

tensor(0.0201, device='cuda:0', grad_fn=<AddBackward0>)


281it [01:31,  3.11it/s, loss=0.0175]

tensor(0.0175, device='cuda:0', grad_fn=<AddBackward0>)


282it [01:31,  3.09it/s, loss=0.0184]

tensor(0.0184, device='cuda:0', grad_fn=<AddBackward0>)


283it [01:32,  3.10it/s, loss=0.0196]

tensor(0.0196, device='cuda:0', grad_fn=<AddBackward0>)


284it [01:32,  3.02it/s, loss=0.0183]

tensor(0.0183, device='cuda:0', grad_fn=<AddBackward0>)


285it [01:32,  2.97it/s, loss=0.0195]

tensor(0.0195, device='cuda:0', grad_fn=<AddBackward0>)


286it [01:33,  3.04it/s, loss=0.0185]

tensor(0.0185, device='cuda:0', grad_fn=<AddBackward0>)


287it [01:33,  3.08it/s, loss=0.0207]

tensor(0.0207, device='cuda:0', grad_fn=<AddBackward0>)


288it [01:33,  3.09it/s, loss=0.02]  

tensor(0.0200, device='cuda:0', grad_fn=<AddBackward0>)


289it [01:34,  3.11it/s, loss=0.0207]

tensor(0.0207, device='cuda:0', grad_fn=<AddBackward0>)


290it [01:34,  3.20it/s, loss=0.0194]

tensor(0.0194, device='cuda:0', grad_fn=<AddBackward0>)


291it [01:34,  3.19it/s, loss=0.0193]

tensor(0.0193, device='cuda:0', grad_fn=<AddBackward0>)


292it [01:35,  3.18it/s, loss=0.0179]

tensor(0.0179, device='cuda:0', grad_fn=<AddBackward0>)


293it [01:35,  3.24it/s, loss=0.0204]

tensor(0.0204, device='cuda:0', grad_fn=<AddBackward0>)


294it [01:35,  3.17it/s, loss=0.0193]

tensor(0.0193, device='cuda:0', grad_fn=<AddBackward0>)


295it [01:35,  3.08it/s, loss=0.0198]

tensor(0.0198, device='cuda:0', grad_fn=<AddBackward0>)


296it [01:36,  3.15it/s, loss=0.0211]

tensor(0.0211, device='cuda:0', grad_fn=<AddBackward0>)


297it [01:36,  3.00it/s, loss=0.0179]

tensor(0.0179, device='cuda:0', grad_fn=<AddBackward0>)


298it [01:36,  3.04it/s, loss=0.0177]

tensor(0.0177, device='cuda:0', grad_fn=<AddBackward0>)


299it [01:37,  2.98it/s, loss=0.0181]

tensor(0.0181, device='cuda:0', grad_fn=<AddBackward0>)


300it [01:37,  3.07it/s, loss=0.0202]

tensor(0.0202, device='cuda:0', grad_fn=<AddBackward0>)


301it [01:37,  3.05it/s, loss=0.0171]

tensor(0.0171, device='cuda:0', grad_fn=<AddBackward0>)


302it [01:38,  3.13it/s, loss=0.0194]

tensor(0.0194, device='cuda:0', grad_fn=<AddBackward0>)


303it [01:38,  3.14it/s, loss=0.0203]

tensor(0.0203, device='cuda:0', grad_fn=<AddBackward0>)


304it [01:38,  3.18it/s, loss=0.0173]

tensor(0.0173, device='cuda:0', grad_fn=<AddBackward0>)


305it [01:39,  3.12it/s, loss=0.0173]

tensor(0.0173, device='cuda:0', grad_fn=<AddBackward0>)


306it [01:39,  3.22it/s, loss=0.0184]

tensor(0.0184, device='cuda:0', grad_fn=<AddBackward0>)


307it [01:39,  3.15it/s, loss=0.0196]

tensor(0.0196, device='cuda:0', grad_fn=<AddBackward0>)


308it [01:40,  3.15it/s, loss=0.0174]

tensor(0.0174, device='cuda:0', grad_fn=<AddBackward0>)


309it [01:40,  3.11it/s, loss=0.0166]

tensor(0.0166, device='cuda:0', grad_fn=<AddBackward0>)


310it [01:40,  3.06it/s, loss=0.0206]

tensor(0.0206, device='cuda:0', grad_fn=<AddBackward0>)


311it [01:41,  3.13it/s, loss=0.0194]

tensor(0.0194, device='cuda:0', grad_fn=<AddBackward0>)


312it [01:41,  3.14it/s, loss=0.0181]

tensor(0.0181, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.0176, device='cuda:0', grad_fn=<AddBackward0>)


313it [01:41,  3.08it/s, loss=0.0176]


Epoch 2 of 400


1it [00:00,  2.47it/s, loss=0.0187]

tensor(0.0187, device='cuda:0', grad_fn=<AddBackward0>)


2it [00:00,  2.99it/s, loss=0.0199]

tensor(0.0199, device='cuda:0', grad_fn=<AddBackward0>)


3it [00:00,  3.22it/s, loss=0.0187]

tensor(0.0187, device='cuda:0', grad_fn=<AddBackward0>)


4it [00:01,  3.30it/s, loss=0.0192]

tensor(0.0192, device='cuda:0', grad_fn=<AddBackward0>)


5it [00:01,  3.23it/s, loss=0.0182]

tensor(0.0182, device='cuda:0', grad_fn=<AddBackward0>)


6it [00:01,  3.25it/s, loss=0.0188]

tensor(0.0188, device='cuda:0', grad_fn=<AddBackward0>)


7it [00:02,  3.27it/s, loss=0.0189]

tensor(0.0189, device='cuda:0', grad_fn=<AddBackward0>)


8it [00:02,  3.28it/s, loss=0.0188]

tensor(0.0188, device='cuda:0', grad_fn=<AddBackward0>)


9it [00:02,  3.20it/s, loss=0.0166]

tensor(0.0166, device='cuda:0', grad_fn=<AddBackward0>)


10it [00:03,  3.30it/s, loss=0.019]

tensor(0.0190, device='cuda:0', grad_fn=<AddBackward0>)


11it [00:03,  3.27it/s, loss=0.0207]

tensor(0.0207, device='cuda:0', grad_fn=<AddBackward0>)


12it [00:03,  3.17it/s, loss=0.0179]

tensor(0.0179, device='cuda:0', grad_fn=<AddBackward0>)


13it [00:04,  3.22it/s, loss=0.0189]

tensor(0.0189, device='cuda:0', grad_fn=<AddBackward0>)


14it [00:04,  3.32it/s, loss=0.0198]

tensor(0.0198, device='cuda:0', grad_fn=<AddBackward0>)


15it [00:04,  3.31it/s, loss=0.0186]

tensor(0.0186, device='cuda:0', grad_fn=<AddBackward0>)


16it [00:04,  3.25it/s, loss=0.0192]

tensor(0.0192, device='cuda:0', grad_fn=<AddBackward0>)


17it [00:05,  3.15it/s, loss=0.0169]

tensor(0.0169, device='cuda:0', grad_fn=<AddBackward0>)


18it [00:05,  3.18it/s, loss=0.0163]

tensor(0.0163, device='cuda:0', grad_fn=<AddBackward0>)


19it [00:05,  3.19it/s, loss=0.0169]

tensor(0.0169, device='cuda:0', grad_fn=<AddBackward0>)


20it [00:06,  3.24it/s, loss=0.0187]

tensor(0.0187, device='cuda:0', grad_fn=<AddBackward0>)


21it [00:06,  3.24it/s, loss=0.0204]

tensor(0.0204, device='cuda:0', grad_fn=<AddBackward0>)


22it [00:06,  3.23it/s, loss=0.018] 

tensor(0.0180, device='cuda:0', grad_fn=<AddBackward0>)


23it [00:07,  3.14it/s, loss=0.0154]

tensor(0.0154, device='cuda:0', grad_fn=<AddBackward0>)


24it [00:07,  3.15it/s, loss=0.017] 

tensor(0.0170, device='cuda:0', grad_fn=<AddBackward0>)


25it [00:07,  3.20it/s, loss=0.0186]

tensor(0.0186, device='cuda:0', grad_fn=<AddBackward0>)


26it [00:08,  3.26it/s, loss=0.0187]

tensor(0.0187, device='cuda:0', grad_fn=<AddBackward0>)


27it [00:08,  3.28it/s, loss=0.0181]

tensor(0.0181, device='cuda:0', grad_fn=<AddBackward0>)


28it [00:08,  3.25it/s, loss=0.0185]

tensor(0.0185, device='cuda:0', grad_fn=<AddBackward0>)


29it [00:08,  3.28it/s, loss=0.018] 

tensor(0.0180, device='cuda:0', grad_fn=<AddBackward0>)


30it [00:09,  3.28it/s, loss=0.0169]

tensor(0.0169, device='cuda:0', grad_fn=<AddBackward0>)


31it [00:09,  3.15it/s, loss=0.018] 

tensor(0.0180, device='cuda:0', grad_fn=<AddBackward0>)


32it [00:09,  3.25it/s, loss=0.0176]

tensor(0.0176, device='cuda:0', grad_fn=<AddBackward0>)


33it [00:10,  3.32it/s, loss=0.0172]

tensor(0.0172, device='cuda:0', grad_fn=<AddBackward0>)


34it [00:10,  3.19it/s, loss=0.0168]

tensor(0.0168, device='cuda:0', grad_fn=<AddBackward0>)


35it [00:10,  3.09it/s, loss=0.0196]

tensor(0.0196, device='cuda:0', grad_fn=<AddBackward0>)


36it [00:11,  3.18it/s, loss=0.0166]

tensor(0.0166, device='cuda:0', grad_fn=<AddBackward0>)


37it [00:11,  3.02it/s, loss=0.0184]

tensor(0.0184, device='cuda:0', grad_fn=<AddBackward0>)


38it [00:11,  2.97it/s, loss=0.017] 

tensor(0.0170, device='cuda:0', grad_fn=<AddBackward0>)


39it [00:12,  3.02it/s, loss=0.0167]

tensor(0.0167, device='cuda:0', grad_fn=<AddBackward0>)


40it [00:12,  3.00it/s, loss=0.0168]

tensor(0.0168, device='cuda:0', grad_fn=<AddBackward0>)


41it [00:12,  2.99it/s, loss=0.0164]

tensor(0.0164, device='cuda:0', grad_fn=<AddBackward0>)


42it [00:13,  3.03it/s, loss=0.021] 

tensor(0.0210, device='cuda:0', grad_fn=<AddBackward0>)


43it [00:13,  3.07it/s, loss=0.0179]

tensor(0.0179, device='cuda:0', grad_fn=<AddBackward0>)


44it [00:13,  3.15it/s, loss=0.0207]

tensor(0.0207, device='cuda:0', grad_fn=<AddBackward0>)


45it [00:14,  3.18it/s, loss=0.0225]

tensor(0.0225, device='cuda:0', grad_fn=<AddBackward0>)


46it [00:14,  3.19it/s, loss=0.0209]

tensor(0.0209, device='cuda:0', grad_fn=<AddBackward0>)


47it [00:14,  3.18it/s, loss=0.0176]

tensor(0.0176, device='cuda:0', grad_fn=<AddBackward0>)


48it [00:15,  3.18it/s, loss=0.0187]

tensor(0.0187, device='cuda:0', grad_fn=<AddBackward0>)


49it [00:15,  3.11it/s, loss=0.0207]

tensor(0.0207, device='cuda:0', grad_fn=<AddBackward0>)


50it [00:15,  3.06it/s, loss=0.018] 

tensor(0.0180, device='cuda:0', grad_fn=<AddBackward0>)


51it [00:16,  3.09it/s, loss=0.0163]

tensor(0.0163, device='cuda:0', grad_fn=<AddBackward0>)


52it [00:16,  3.07it/s, loss=0.0171]

tensor(0.0171, device='cuda:0', grad_fn=<AddBackward0>)


53it [00:16,  2.97it/s, loss=0.0179]

tensor(0.0179, device='cuda:0', grad_fn=<AddBackward0>)


54it [00:17,  3.06it/s, loss=0.0178]

tensor(0.0178, device='cuda:0', grad_fn=<AddBackward0>)


55it [00:17,  3.11it/s, loss=0.0165]

tensor(0.0165, device='cuda:0', grad_fn=<AddBackward0>)


56it [00:17,  3.07it/s, loss=0.0166]

tensor(0.0166, device='cuda:0', grad_fn=<AddBackward0>)


57it [00:18,  3.08it/s, loss=0.0175]

tensor(0.0175, device='cuda:0', grad_fn=<AddBackward0>)


58it [00:18,  3.04it/s, loss=0.018] 

tensor(0.0180, device='cuda:0', grad_fn=<AddBackward0>)


59it [00:18,  3.02it/s, loss=0.0167]

tensor(0.0167, device='cuda:0', grad_fn=<AddBackward0>)


60it [00:19,  3.06it/s, loss=0.0169]

tensor(0.0169, device='cuda:0', grad_fn=<AddBackward0>)


61it [00:19,  3.08it/s, loss=0.0187]

tensor(0.0187, device='cuda:0', grad_fn=<AddBackward0>)


62it [00:19,  3.13it/s, loss=0.0149]

tensor(0.0149, device='cuda:0', grad_fn=<AddBackward0>)


63it [00:20,  3.11it/s, loss=0.0186]

tensor(0.0186, device='cuda:0', grad_fn=<AddBackward0>)


64it [00:20,  3.19it/s, loss=0.0192]

tensor(0.0192, device='cuda:0', grad_fn=<AddBackward0>)


65it [00:20,  3.11it/s, loss=0.0166]

tensor(0.0166, device='cuda:0', grad_fn=<AddBackward0>)


66it [00:20,  3.09it/s, loss=0.0168]

tensor(0.0168, device='cuda:0', grad_fn=<AddBackward0>)


67it [00:21,  3.01it/s, loss=0.0169]

tensor(0.0169, device='cuda:0', grad_fn=<AddBackward0>)


68it [00:21,  3.07it/s, loss=0.0188]

tensor(0.0188, device='cuda:0', grad_fn=<AddBackward0>)


69it [00:21,  3.09it/s, loss=0.017] 

tensor(0.0170, device='cuda:0', grad_fn=<AddBackward0>)


70it [00:22,  3.19it/s, loss=0.0176]

tensor(0.0176, device='cuda:0', grad_fn=<AddBackward0>)


71it [00:22,  3.13it/s, loss=0.0176]

tensor(0.0176, device='cuda:0', grad_fn=<AddBackward0>)


72it [00:22,  3.12it/s, loss=0.0182]

tensor(0.0182, device='cuda:0', grad_fn=<AddBackward0>)


73it [00:23,  3.18it/s, loss=0.0188]

tensor(0.0188, device='cuda:0', grad_fn=<AddBackward0>)


74it [00:23,  3.15it/s, loss=0.018] 

tensor(0.0180, device='cuda:0', grad_fn=<AddBackward0>)


75it [00:23,  2.99it/s, loss=0.0171]

tensor(0.0171, device='cuda:0', grad_fn=<AddBackward0>)


76it [00:24,  3.04it/s, loss=0.0185]

tensor(0.0185, device='cuda:0', grad_fn=<AddBackward0>)


77it [00:24,  3.03it/s, loss=0.0191]

tensor(0.0191, device='cuda:0', grad_fn=<AddBackward0>)


78it [00:24,  3.01it/s, loss=0.0168]

tensor(0.0168, device='cuda:0', grad_fn=<AddBackward0>)


79it [00:25,  3.08it/s, loss=0.0166]

tensor(0.0166, device='cuda:0', grad_fn=<AddBackward0>)


80it [00:25,  3.09it/s, loss=0.0151]

tensor(0.0151, device='cuda:0', grad_fn=<AddBackward0>)


81it [00:25,  3.07it/s, loss=0.0168]

tensor(0.0168, device='cuda:0', grad_fn=<AddBackward0>)


82it [00:26,  3.15it/s, loss=0.0162]

tensor(0.0162, device='cuda:0', grad_fn=<AddBackward0>)


83it [00:26,  3.06it/s, loss=0.017] 

tensor(0.0170, device='cuda:0', grad_fn=<AddBackward0>)


84it [00:26,  3.02it/s, loss=0.0169]

tensor(0.0169, device='cuda:0', grad_fn=<AddBackward0>)


85it [00:27,  3.03it/s, loss=0.0174]

tensor(0.0174, device='cuda:0', grad_fn=<AddBackward0>)


86it [00:27,  3.12it/s, loss=0.016] 

tensor(0.0160, device='cuda:0', grad_fn=<AddBackward0>)


87it [00:27,  3.05it/s, loss=0.0164]

tensor(0.0164, device='cuda:0', grad_fn=<AddBackward0>)


88it [00:28,  3.11it/s, loss=0.0147]

tensor(0.0147, device='cuda:0', grad_fn=<AddBackward0>)


89it [00:28,  3.07it/s, loss=0.0164]

tensor(0.0164, device='cuda:0', grad_fn=<AddBackward0>)


90it [00:28,  3.10it/s, loss=0.0164]

tensor(0.0164, device='cuda:0', grad_fn=<AddBackward0>)


91it [00:29,  3.05it/s, loss=0.0154]

tensor(0.0154, device='cuda:0', grad_fn=<AddBackward0>)


92it [00:29,  3.09it/s, loss=0.0186]

tensor(0.0186, device='cuda:0', grad_fn=<AddBackward0>)


93it [00:29,  3.01it/s, loss=0.017] 

tensor(0.0170, device='cuda:0', grad_fn=<AddBackward0>)


94it [00:30,  3.05it/s, loss=0.0166]

tensor(0.0166, device='cuda:0', grad_fn=<AddBackward0>)


95it [00:30,  3.03it/s, loss=0.0157]

tensor(0.0157, device='cuda:0', grad_fn=<AddBackward0>)


96it [00:30,  3.02it/s, loss=0.0178]

tensor(0.0178, device='cuda:0', grad_fn=<AddBackward0>)


97it [00:31,  2.94it/s, loss=0.0153]

tensor(0.0153, device='cuda:0', grad_fn=<AddBackward0>)


98it [00:31,  2.97it/s, loss=0.0164]

tensor(0.0164, device='cuda:0', grad_fn=<AddBackward0>)


99it [00:31,  2.93it/s, loss=0.0161]

tensor(0.0161, device='cuda:0', grad_fn=<AddBackward0>)


100it [00:32,  3.03it/s, loss=0.0174]

tensor(0.0174, device='cuda:0', grad_fn=<AddBackward0>)


101it [00:32,  3.12it/s, loss=0.0156]

tensor(0.0156, device='cuda:0', grad_fn=<AddBackward0>)


102it [00:32,  3.11it/s, loss=0.0154]

tensor(0.0154, device='cuda:0', grad_fn=<AddBackward0>)


103it [00:33,  3.06it/s, loss=0.0157]

tensor(0.0157, device='cuda:0', grad_fn=<AddBackward0>)


104it [00:33,  3.06it/s, loss=0.0159]

tensor(0.0159, device='cuda:0', grad_fn=<AddBackward0>)


105it [00:33,  3.04it/s, loss=0.0172]

tensor(0.0172, device='cuda:0', grad_fn=<AddBackward0>)


106it [00:34,  3.14it/s, loss=0.0155]

tensor(0.0155, device='cuda:0', grad_fn=<AddBackward0>)


107it [00:34,  3.11it/s, loss=0.0164]

tensor(0.0164, device='cuda:0', grad_fn=<AddBackward0>)


108it [00:34,  3.05it/s, loss=0.0159]

tensor(0.0159, device='cuda:0', grad_fn=<AddBackward0>)


109it [00:35,  3.03it/s, loss=0.0164]

tensor(0.0164, device='cuda:0', grad_fn=<AddBackward0>)


110it [00:35,  3.03it/s, loss=0.0149]

tensor(0.0149, device='cuda:0', grad_fn=<AddBackward0>)


111it [00:35,  3.01it/s, loss=0.0163]

tensor(0.0163, device='cuda:0', grad_fn=<AddBackward0>)


112it [00:35,  3.09it/s, loss=0.0152]

tensor(0.0152, device='cuda:0', grad_fn=<AddBackward0>)


113it [00:36,  3.07it/s, loss=0.017] 

tensor(0.0170, device='cuda:0', grad_fn=<AddBackward0>)


114it [00:36,  3.06it/s, loss=0.0167]

tensor(0.0167, device='cuda:0', grad_fn=<AddBackward0>)


115it [00:36,  3.01it/s, loss=0.016] 

tensor(0.0160, device='cuda:0', grad_fn=<AddBackward0>)


116it [00:37,  3.08it/s, loss=0.0155]

tensor(0.0155, device='cuda:0', grad_fn=<AddBackward0>)


117it [00:37,  3.10it/s, loss=0.0165]

tensor(0.0165, device='cuda:0', grad_fn=<AddBackward0>)


118it [00:37,  3.13it/s, loss=0.015] 

tensor(0.0150, device='cuda:0', grad_fn=<AddBackward0>)


119it [00:38,  3.21it/s, loss=0.0162]

tensor(0.0162, device='cuda:0', grad_fn=<AddBackward0>)


120it [00:38,  3.11it/s, loss=0.0152]

tensor(0.0152, device='cuda:0', grad_fn=<AddBackward0>)


121it [00:38,  3.04it/s, loss=0.0144]

tensor(0.0144, device='cuda:0', grad_fn=<AddBackward0>)


122it [00:39,  3.13it/s, loss=0.0157]

tensor(0.0157, device='cuda:0', grad_fn=<AddBackward0>)


123it [00:39,  3.13it/s, loss=0.0153]

tensor(0.0153, device='cuda:0', grad_fn=<AddBackward0>)


124it [00:39,  3.14it/s, loss=0.0145]

tensor(0.0145, device='cuda:0', grad_fn=<AddBackward0>)


125it [00:40,  3.16it/s, loss=0.0152]

tensor(0.0152, device='cuda:0', grad_fn=<AddBackward0>)


126it [00:40,  3.22it/s, loss=0.0162]

tensor(0.0162, device='cuda:0', grad_fn=<AddBackward0>)


127it [00:40,  3.10it/s, loss=0.0142]

tensor(0.0142, device='cuda:0', grad_fn=<AddBackward0>)


128it [00:41,  3.10it/s, loss=0.0148]

tensor(0.0148, device='cuda:0', grad_fn=<AddBackward0>)


129it [00:41,  3.06it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


130it [00:41,  3.10it/s, loss=0.0155]

tensor(0.0155, device='cuda:0', grad_fn=<AddBackward0>)


131it [00:42,  3.14it/s, loss=0.0165]

tensor(0.0165, device='cuda:0', grad_fn=<AddBackward0>)


132it [00:42,  3.16it/s, loss=0.0135]

tensor(0.0135, device='cuda:0', grad_fn=<AddBackward0>)


133it [00:42,  3.14it/s, loss=0.017] 

tensor(0.0170, device='cuda:0', grad_fn=<AddBackward0>)


134it [00:43,  3.20it/s, loss=0.0157]

tensor(0.0157, device='cuda:0', grad_fn=<AddBackward0>)


135it [00:43,  3.06it/s, loss=0.0158]

tensor(0.0158, device='cuda:0', grad_fn=<AddBackward0>)


136it [00:43,  3.10it/s, loss=0.0138]

tensor(0.0138, device='cuda:0', grad_fn=<AddBackward0>)


137it [00:44,  3.10it/s, loss=0.0157]

tensor(0.0157, device='cuda:0', grad_fn=<AddBackward0>)


138it [00:44,  3.15it/s, loss=0.0151]

tensor(0.0151, device='cuda:0', grad_fn=<AddBackward0>)


139it [00:44,  3.10it/s, loss=0.0152]

tensor(0.0152, device='cuda:0', grad_fn=<AddBackward0>)


140it [00:45,  3.02it/s, loss=0.0153]

tensor(0.0153, device='cuda:0', grad_fn=<AddBackward0>)


141it [00:45,  3.07it/s, loss=0.0151]

tensor(0.0151, device='cuda:0', grad_fn=<AddBackward0>)


142it [00:45,  3.18it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


143it [00:45,  3.05it/s, loss=0.0152]

tensor(0.0152, device='cuda:0', grad_fn=<AddBackward0>)


144it [00:46,  3.10it/s, loss=0.016] 

tensor(0.0160, device='cuda:0', grad_fn=<AddBackward0>)


145it [00:46,  3.08it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


146it [00:46,  3.07it/s, loss=0.0143]

tensor(0.0143, device='cuda:0', grad_fn=<AddBackward0>)


147it [00:47,  3.08it/s, loss=0.0148]

tensor(0.0148, device='cuda:0', grad_fn=<AddBackward0>)


148it [00:47,  3.12it/s, loss=0.0158]

tensor(0.0158, device='cuda:0', grad_fn=<AddBackward0>)


149it [00:47,  3.00it/s, loss=0.0148]

tensor(0.0148, device='cuda:0', grad_fn=<AddBackward0>)


150it [00:48,  3.12it/s, loss=0.014] 

tensor(0.0140, device='cuda:0', grad_fn=<AddBackward0>)


151it [00:48,  3.13it/s, loss=0.016]

tensor(0.0160, device='cuda:0', grad_fn=<AddBackward0>)


152it [00:48,  3.15it/s, loss=0.015]

tensor(0.0150, device='cuda:0', grad_fn=<AddBackward0>)


153it [00:49,  3.20it/s, loss=0.0151]

tensor(0.0151, device='cuda:0', grad_fn=<AddBackward0>)


154it [00:49,  3.16it/s, loss=0.014] 

tensor(0.0140, device='cuda:0', grad_fn=<AddBackward0>)


155it [00:49,  3.09it/s, loss=0.0143]

tensor(0.0143, device='cuda:0', grad_fn=<AddBackward0>)


156it [00:50,  3.13it/s, loss=0.0154]

tensor(0.0154, device='cuda:0', grad_fn=<AddBackward0>)


157it [00:50,  3.07it/s, loss=0.0166]

tensor(0.0166, device='cuda:0', grad_fn=<AddBackward0>)


158it [00:50,  3.15it/s, loss=0.0149]

tensor(0.0149, device='cuda:0', grad_fn=<AddBackward0>)


159it [00:51,  3.10it/s, loss=0.0157]

tensor(0.0157, device='cuda:0', grad_fn=<AddBackward0>)


160it [00:51,  3.10it/s, loss=0.0194]

tensor(0.0194, device='cuda:0', grad_fn=<AddBackward0>)


161it [00:51,  2.96it/s, loss=0.0175]

tensor(0.0175, device='cuda:0', grad_fn=<AddBackward0>)


162it [00:52,  3.04it/s, loss=0.0184]

tensor(0.0184, device='cuda:0', grad_fn=<AddBackward0>)


163it [00:52,  3.09it/s, loss=0.016] 

tensor(0.0160, device='cuda:0', grad_fn=<AddBackward0>)


164it [00:52,  3.09it/s, loss=0.0167]

tensor(0.0167, device='cuda:0', grad_fn=<AddBackward0>)


165it [00:53,  3.06it/s, loss=0.0163]

tensor(0.0163, device='cuda:0', grad_fn=<AddBackward0>)


166it [00:53,  3.09it/s, loss=0.0151]

tensor(0.0151, device='cuda:0', grad_fn=<AddBackward0>)


167it [00:53,  3.15it/s, loss=0.0158]

tensor(0.0158, device='cuda:0', grad_fn=<AddBackward0>)


168it [00:54,  3.12it/s, loss=0.0143]

tensor(0.0143, device='cuda:0', grad_fn=<AddBackward0>)


169it [00:54,  3.04it/s, loss=0.0146]

tensor(0.0146, device='cuda:0', grad_fn=<AddBackward0>)


170it [00:54,  3.08it/s, loss=0.0161]

tensor(0.0161, device='cuda:0', grad_fn=<AddBackward0>)


171it [00:55,  3.07it/s, loss=0.0159]

tensor(0.0159, device='cuda:0', grad_fn=<AddBackward0>)


172it [00:55,  3.07it/s, loss=0.0147]

tensor(0.0147, device='cuda:0', grad_fn=<AddBackward0>)


173it [00:55,  2.99it/s, loss=0.0154]

tensor(0.0154, device='cuda:0', grad_fn=<AddBackward0>)


174it [00:56,  3.03it/s, loss=0.015] 

tensor(0.0150, device='cuda:0', grad_fn=<AddBackward0>)


175it [00:56,  3.01it/s, loss=0.0137]

tensor(0.0137, device='cuda:0', grad_fn=<AddBackward0>)


176it [00:56,  2.97it/s, loss=0.0144]

tensor(0.0144, device='cuda:0', grad_fn=<AddBackward0>)


177it [00:57,  2.96it/s, loss=0.0157]

tensor(0.0157, device='cuda:0', grad_fn=<AddBackward0>)


178it [00:57,  3.00it/s, loss=0.0154]

tensor(0.0154, device='cuda:0', grad_fn=<AddBackward0>)


179it [00:57,  3.03it/s, loss=0.0152]

tensor(0.0152, device='cuda:0', grad_fn=<AddBackward0>)


180it [00:57,  3.12it/s, loss=0.0164]

tensor(0.0164, device='cuda:0', grad_fn=<AddBackward0>)


181it [00:58,  3.08it/s, loss=0.0149]

tensor(0.0149, device='cuda:0', grad_fn=<AddBackward0>)


182it [00:58,  3.04it/s, loss=0.0148]

tensor(0.0148, device='cuda:0', grad_fn=<AddBackward0>)


183it [00:58,  3.10it/s, loss=0.0137]

tensor(0.0137, device='cuda:0', grad_fn=<AddBackward0>)


184it [00:59,  3.18it/s, loss=0.0148]

tensor(0.0148, device='cuda:0', grad_fn=<AddBackward0>)


185it [00:59,  3.17it/s, loss=0.0159]

tensor(0.0159, device='cuda:0', grad_fn=<AddBackward0>)


186it [00:59,  3.17it/s, loss=0.0135]

tensor(0.0135, device='cuda:0', grad_fn=<AddBackward0>)


187it [01:00,  3.15it/s, loss=0.0145]

tensor(0.0145, device='cuda:0', grad_fn=<AddBackward0>)


188it [01:00,  3.15it/s, loss=0.0138]

tensor(0.0138, device='cuda:0', grad_fn=<AddBackward0>)


189it [01:00,  3.00it/s, loss=0.0139]

tensor(0.0139, device='cuda:0', grad_fn=<AddBackward0>)


190it [01:01,  3.07it/s, loss=0.0145]

tensor(0.0145, device='cuda:0', grad_fn=<AddBackward0>)


191it [01:01,  3.13it/s, loss=0.0151]

tensor(0.0151, device='cuda:0', grad_fn=<AddBackward0>)


192it [01:01,  3.11it/s, loss=0.0164]

tensor(0.0164, device='cuda:0', grad_fn=<AddBackward0>)


193it [01:02,  3.07it/s, loss=0.0133]

tensor(0.0133, device='cuda:0', grad_fn=<AddBackward0>)


194it [01:02,  3.14it/s, loss=0.0163]

tensor(0.0163, device='cuda:0', grad_fn=<AddBackward0>)


195it [01:02,  3.14it/s, loss=0.0143]

tensor(0.0143, device='cuda:0', grad_fn=<AddBackward0>)


196it [01:03,  3.15it/s, loss=0.0148]

tensor(0.0148, device='cuda:0', grad_fn=<AddBackward0>)


197it [01:03,  3.21it/s, loss=0.0138]

tensor(0.0138, device='cuda:0', grad_fn=<AddBackward0>)


198it [01:03,  3.16it/s, loss=0.0145]

tensor(0.0145, device='cuda:0', grad_fn=<AddBackward0>)


199it [01:04,  3.17it/s, loss=0.014] 

tensor(0.0140, device='cuda:0', grad_fn=<AddBackward0>)


200it [01:04,  3.10it/s, loss=0.0137]

tensor(0.0137, device='cuda:0', grad_fn=<AddBackward0>)


201it [01:04,  2.96it/s, loss=0.014] 

tensor(0.0140, device='cuda:0', grad_fn=<AddBackward0>)


202it [01:05,  3.02it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


203it [01:05,  3.03it/s, loss=0.0162]

tensor(0.0162, device='cuda:0', grad_fn=<AddBackward0>)


204it [01:05,  3.11it/s, loss=0.0155]

tensor(0.0155, device='cuda:0', grad_fn=<AddBackward0>)


205it [01:06,  3.14it/s, loss=0.0135]

tensor(0.0135, device='cuda:0', grad_fn=<AddBackward0>)


206it [01:06,  3.13it/s, loss=0.0145]

tensor(0.0145, device='cuda:0', grad_fn=<AddBackward0>)


207it [01:06,  3.11it/s, loss=0.0151]

tensor(0.0151, device='cuda:0', grad_fn=<AddBackward0>)


208it [01:06,  3.17it/s, loss=0.0134]

tensor(0.0134, device='cuda:0', grad_fn=<AddBackward0>)


209it [01:07,  3.09it/s, loss=0.0133]

tensor(0.0133, device='cuda:0', grad_fn=<AddBackward0>)


210it [01:07,  3.09it/s, loss=0.0139]

tensor(0.0139, device='cuda:0', grad_fn=<AddBackward0>)


211it [01:07,  3.13it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


212it [01:08,  3.14it/s, loss=0.0146]

tensor(0.0146, device='cuda:0', grad_fn=<AddBackward0>)


213it [01:08,  3.12it/s, loss=0.0139]

tensor(0.0139, device='cuda:0', grad_fn=<AddBackward0>)


214it [01:08,  3.10it/s, loss=0.0144]

tensor(0.0144, device='cuda:0', grad_fn=<AddBackward0>)


215it [01:09,  3.11it/s, loss=0.0151]

tensor(0.0151, device='cuda:0', grad_fn=<AddBackward0>)


216it [01:09,  3.05it/s, loss=0.0146]

tensor(0.0146, device='cuda:0', grad_fn=<AddBackward0>)


217it [01:09,  2.97it/s, loss=0.0132]

tensor(0.0132, device='cuda:0', grad_fn=<AddBackward0>)


218it [01:10,  3.08it/s, loss=0.0142]

tensor(0.0142, device='cuda:0', grad_fn=<AddBackward0>)


219it [01:10,  3.12it/s, loss=0.0152]

tensor(0.0152, device='cuda:0', grad_fn=<AddBackward0>)


220it [01:10,  3.10it/s, loss=0.0131]

tensor(0.0131, device='cuda:0', grad_fn=<AddBackward0>)


221it [01:11,  3.12it/s, loss=0.0142]

tensor(0.0142, device='cuda:0', grad_fn=<AddBackward0>)


222it [01:11,  3.20it/s, loss=0.0144]

tensor(0.0144, device='cuda:0', grad_fn=<AddBackward0>)


223it [01:11,  3.16it/s, loss=0.0145]

tensor(0.0145, device='cuda:0', grad_fn=<AddBackward0>)


224it [01:12,  3.21it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


225it [01:12,  3.08it/s, loss=0.0152]

tensor(0.0152, device='cuda:0', grad_fn=<AddBackward0>)


226it [01:12,  3.10it/s, loss=0.0153]

tensor(0.0153, device='cuda:0', grad_fn=<AddBackward0>)


227it [01:13,  3.04it/s, loss=0.0146]

tensor(0.0146, device='cuda:0', grad_fn=<AddBackward0>)


228it [01:13,  3.07it/s, loss=0.0132]

tensor(0.0132, device='cuda:0', grad_fn=<AddBackward0>)


229it [01:13,  3.02it/s, loss=0.0137]

tensor(0.0137, device='cuda:0', grad_fn=<AddBackward0>)


230it [01:14,  3.04it/s, loss=0.0145]

tensor(0.0145, device='cuda:0', grad_fn=<AddBackward0>)


231it [01:14,  3.04it/s, loss=0.0145]

tensor(0.0145, device='cuda:0', grad_fn=<AddBackward0>)


232it [01:14,  3.12it/s, loss=0.014] 

tensor(0.0140, device='cuda:0', grad_fn=<AddBackward0>)


233it [01:15,  3.12it/s, loss=0.0145]

tensor(0.0145, device='cuda:0', grad_fn=<AddBackward0>)


234it [01:15,  3.13it/s, loss=0.0135]

tensor(0.0135, device='cuda:0', grad_fn=<AddBackward0>)


235it [01:15,  3.01it/s, loss=0.015] 

tensor(0.0150, device='cuda:0', grad_fn=<AddBackward0>)


236it [01:16,  3.07it/s, loss=0.0151]

tensor(0.0151, device='cuda:0', grad_fn=<AddBackward0>)


237it [01:16,  3.04it/s, loss=0.0141]

tensor(0.0141, device='cuda:0', grad_fn=<AddBackward0>)


238it [01:16,  3.03it/s, loss=0.0151]

tensor(0.0151, device='cuda:0', grad_fn=<AddBackward0>)


239it [01:17,  3.06it/s, loss=0.0132]

tensor(0.0132, device='cuda:0', grad_fn=<AddBackward0>)


240it [01:17,  3.08it/s, loss=0.0152]

tensor(0.0152, device='cuda:0', grad_fn=<AddBackward0>)


241it [01:17,  3.04it/s, loss=0.015] 

tensor(0.0150, device='cuda:0', grad_fn=<AddBackward0>)


242it [01:18,  3.07it/s, loss=0.0129]

tensor(0.0129, device='cuda:0', grad_fn=<AddBackward0>)


243it [01:18,  3.08it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


244it [01:18,  3.13it/s, loss=0.0141]

tensor(0.0141, device='cuda:0', grad_fn=<AddBackward0>)


245it [01:18,  3.04it/s, loss=0.0154]

tensor(0.0154, device='cuda:0', grad_fn=<AddBackward0>)


246it [01:19,  3.12it/s, loss=0.0139]

tensor(0.0139, device='cuda:0', grad_fn=<AddBackward0>)


247it [01:19,  3.11it/s, loss=0.0137]

tensor(0.0137, device='cuda:0', grad_fn=<AddBackward0>)


248it [01:19,  3.08it/s, loss=0.0149]

tensor(0.0149, device='cuda:0', grad_fn=<AddBackward0>)


249it [01:20,  3.12it/s, loss=0.0149]

tensor(0.0149, device='cuda:0', grad_fn=<AddBackward0>)


250it [01:20,  3.13it/s, loss=0.0122]

tensor(0.0122, device='cuda:0', grad_fn=<AddBackward0>)


251it [01:20,  3.05it/s, loss=0.0141]

tensor(0.0141, device='cuda:0', grad_fn=<AddBackward0>)


252it [01:21,  3.09it/s, loss=0.0137]

tensor(0.0137, device='cuda:0', grad_fn=<AddBackward0>)


253it [01:21,  3.01it/s, loss=0.0135]

tensor(0.0135, device='cuda:0', grad_fn=<AddBackward0>)


254it [01:21,  3.11it/s, loss=0.0159]

tensor(0.0159, device='cuda:0', grad_fn=<AddBackward0>)


255it [01:22,  3.16it/s, loss=0.0137]

tensor(0.0137, device='cuda:0', grad_fn=<AddBackward0>)


256it [01:22,  3.17it/s, loss=0.0133]

tensor(0.0133, device='cuda:0', grad_fn=<AddBackward0>)


257it [01:22,  3.15it/s, loss=0.0143]

tensor(0.0143, device='cuda:0', grad_fn=<AddBackward0>)


258it [01:23,  3.10it/s, loss=0.0148]

tensor(0.0148, device='cuda:0', grad_fn=<AddBackward0>)


259it [01:23,  3.09it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


260it [01:23,  2.99it/s, loss=0.0134]

tensor(0.0134, device='cuda:0', grad_fn=<AddBackward0>)


261it [01:24,  3.04it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


262it [01:24,  3.11it/s, loss=0.0143]

tensor(0.0143, device='cuda:0', grad_fn=<AddBackward0>)


263it [01:24,  3.07it/s, loss=0.0137]

tensor(0.0137, device='cuda:0', grad_fn=<AddBackward0>)


264it [01:25,  3.10it/s, loss=0.0145]

tensor(0.0145, device='cuda:0', grad_fn=<AddBackward0>)


265it [01:25,  3.11it/s, loss=0.0142]

tensor(0.0142, device='cuda:0', grad_fn=<AddBackward0>)


266it [01:25,  3.06it/s, loss=0.0151]

tensor(0.0151, device='cuda:0', grad_fn=<AddBackward0>)


267it [01:26,  2.93it/s, loss=0.013] 

tensor(0.0130, device='cuda:0', grad_fn=<AddBackward0>)


268it [01:26,  3.01it/s, loss=0.0141]

tensor(0.0141, device='cuda:0', grad_fn=<AddBackward0>)


269it [01:26,  3.03it/s, loss=0.0135]

tensor(0.0135, device='cuda:0', grad_fn=<AddBackward0>)


270it [01:27,  3.05it/s, loss=0.0138]

tensor(0.0138, device='cuda:0', grad_fn=<AddBackward0>)


271it [01:27,  3.05it/s, loss=0.0143]

tensor(0.0143, device='cuda:0', grad_fn=<AddBackward0>)


272it [01:27,  3.09it/s, loss=0.0139]

tensor(0.0139, device='cuda:0', grad_fn=<AddBackward0>)


273it [01:28,  3.06it/s, loss=0.0126]

tensor(0.0126, device='cuda:0', grad_fn=<AddBackward0>)


274it [01:28,  3.14it/s, loss=0.0132]

tensor(0.0132, device='cuda:0', grad_fn=<AddBackward0>)


275it [01:28,  2.87it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


276it [01:29,  2.94it/s, loss=0.0131]

tensor(0.0131, device='cuda:0', grad_fn=<AddBackward0>)


277it [01:29,  2.88it/s, loss=0.0146]

tensor(0.0146, device='cuda:0', grad_fn=<AddBackward0>)


278it [01:29,  2.92it/s, loss=0.0125]

tensor(0.0125, device='cuda:0', grad_fn=<AddBackward0>)


279it [01:30,  2.92it/s, loss=0.0142]

tensor(0.0142, device='cuda:0', grad_fn=<AddBackward0>)


280it [01:30,  2.90it/s, loss=0.0122]

tensor(0.0122, device='cuda:0', grad_fn=<AddBackward0>)


281it [01:30,  2.94it/s, loss=0.0138]

tensor(0.0138, device='cuda:0', grad_fn=<AddBackward0>)


282it [01:31,  3.03it/s, loss=0.0139]

tensor(0.0139, device='cuda:0', grad_fn=<AddBackward0>)


283it [01:31,  3.02it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


284it [01:31,  3.02it/s, loss=0.0129]

tensor(0.0129, device='cuda:0', grad_fn=<AddBackward0>)


285it [01:32,  3.05it/s, loss=0.0135]

tensor(0.0135, device='cuda:0', grad_fn=<AddBackward0>)


286it [01:32,  3.12it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


287it [01:32,  3.04it/s, loss=0.0132]

tensor(0.0132, device='cuda:0', grad_fn=<AddBackward0>)


288it [01:33,  3.01it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


289it [01:33,  3.07it/s, loss=0.0131]

tensor(0.0131, device='cuda:0', grad_fn=<AddBackward0>)


290it [01:33,  3.08it/s, loss=0.0137]

tensor(0.0137, device='cuda:0', grad_fn=<AddBackward0>)


291it [01:34,  3.16it/s, loss=0.0132]

tensor(0.0132, device='cuda:0', grad_fn=<AddBackward0>)


292it [01:34,  3.18it/s, loss=0.0137]

tensor(0.0137, device='cuda:0', grad_fn=<AddBackward0>)


293it [01:34,  3.09it/s, loss=0.0125]

tensor(0.0125, device='cuda:0', grad_fn=<AddBackward0>)


294it [01:35,  3.13it/s, loss=0.014] 

tensor(0.0140, device='cuda:0', grad_fn=<AddBackward0>)


295it [01:35,  3.11it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


296it [01:35,  3.15it/s, loss=0.0127]

tensor(0.0127, device='cuda:0', grad_fn=<AddBackward0>)


297it [01:35,  3.20it/s, loss=0.0125]

tensor(0.0125, device='cuda:0', grad_fn=<AddBackward0>)


298it [01:36,  3.29it/s, loss=0.0127]

tensor(0.0127, device='cuda:0', grad_fn=<AddBackward0>)


299it [01:36,  3.22it/s, loss=0.0133]

tensor(0.0133, device='cuda:0', grad_fn=<AddBackward0>)


300it [01:36,  3.14it/s, loss=0.013] 

tensor(0.0130, device='cuda:0', grad_fn=<AddBackward0>)


301it [01:37,  3.18it/s, loss=0.012]

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


302it [01:37,  3.18it/s, loss=0.0127]

tensor(0.0127, device='cuda:0', grad_fn=<AddBackward0>)


303it [01:37,  3.12it/s, loss=0.0148]

tensor(0.0148, device='cuda:0', grad_fn=<AddBackward0>)


304it [01:38,  3.17it/s, loss=0.0122]

tensor(0.0122, device='cuda:0', grad_fn=<AddBackward0>)


305it [01:38,  3.13it/s, loss=0.0129]

tensor(0.0129, device='cuda:0', grad_fn=<AddBackward0>)


306it [01:38,  3.17it/s, loss=0.0134]

tensor(0.0134, device='cuda:0', grad_fn=<AddBackward0>)


307it [01:39,  3.20it/s, loss=0.0134]

tensor(0.0134, device='cuda:0', grad_fn=<AddBackward0>)


308it [01:39,  3.30it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


309it [01:39,  3.24it/s, loss=0.014] 

tensor(0.0140, device='cuda:0', grad_fn=<AddBackward0>)


310it [01:40,  3.21it/s, loss=0.0135]

tensor(0.0135, device='cuda:0', grad_fn=<AddBackward0>)


311it [01:40,  3.12it/s, loss=0.0129]

tensor(0.0129, device='cuda:0', grad_fn=<AddBackward0>)


313it [01:40,  3.75it/s, loss=0.0134]

tensor(0.0152, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.0134, device='cuda:0', grad_fn=<AddBackward0>)


313it [01:40,  3.10it/s, loss=0.0134]

Epoch 3 of 400



1it [00:00,  2.66it/s, loss=0.012]

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


2it [00:00,  3.08it/s, loss=0.0131]

tensor(0.0131, device='cuda:0', grad_fn=<AddBackward0>)


3it [00:00,  3.09it/s, loss=0.0121]

tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


4it [00:01,  3.29it/s, loss=0.0125]

tensor(0.0125, device='cuda:0', grad_fn=<AddBackward0>)


5it [00:01,  3.26it/s, loss=0.013] 

tensor(0.0130, device='cuda:0', grad_fn=<AddBackward0>)


6it [00:01,  3.29it/s, loss=0.0126]

tensor(0.0126, device='cuda:0', grad_fn=<AddBackward0>)


7it [00:02,  3.27it/s, loss=0.0151]

tensor(0.0151, device='cuda:0', grad_fn=<AddBackward0>)


8it [00:02,  3.37it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


9it [00:02,  3.34it/s, loss=0.0141]

tensor(0.0141, device='cuda:0', grad_fn=<AddBackward0>)


10it [00:03,  3.22it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


11it [00:03,  3.23it/s, loss=0.0129]

tensor(0.0129, device='cuda:0', grad_fn=<AddBackward0>)


12it [00:03,  3.32it/s, loss=0.0127]

tensor(0.0127, device='cuda:0', grad_fn=<AddBackward0>)


13it [00:03,  3.33it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


14it [00:04,  3.38it/s, loss=0.0124]

tensor(0.0124, device='cuda:0', grad_fn=<AddBackward0>)


15it [00:04,  3.35it/s, loss=0.0127]

tensor(0.0127, device='cuda:0', grad_fn=<AddBackward0>)


16it [00:04,  3.42it/s, loss=0.0126]

tensor(0.0126, device='cuda:0', grad_fn=<AddBackward0>)


17it [00:05,  3.37it/s, loss=0.0132]

tensor(0.0132, device='cuda:0', grad_fn=<AddBackward0>)


18it [00:05,  3.36it/s, loss=0.0135]

tensor(0.0135, device='cuda:0', grad_fn=<AddBackward0>)


19it [00:05,  3.29it/s, loss=0.0133]

tensor(0.0133, device='cuda:0', grad_fn=<AddBackward0>)


20it [00:06,  3.33it/s, loss=0.0142]

tensor(0.0142, device='cuda:0', grad_fn=<AddBackward0>)


21it [00:06,  3.33it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


22it [00:06,  3.33it/s, loss=0.0134]

tensor(0.0134, device='cuda:0', grad_fn=<AddBackward0>)


23it [00:06,  3.25it/s, loss=0.0124]

tensor(0.0124, device='cuda:0', grad_fn=<AddBackward0>)


24it [00:07,  3.31it/s, loss=0.0124]

tensor(0.0124, device='cuda:0', grad_fn=<AddBackward0>)


25it [00:07,  3.26it/s, loss=0.0126]

tensor(0.0126, device='cuda:0', grad_fn=<AddBackward0>)


26it [00:07,  3.32it/s, loss=0.0133]

tensor(0.0133, device='cuda:0', grad_fn=<AddBackward0>)


27it [00:08,  3.32it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


28it [00:08,  3.35it/s, loss=0.0125]

tensor(0.0125, device='cuda:0', grad_fn=<AddBackward0>)


29it [00:08,  3.25it/s, loss=0.0127]

tensor(0.0127, device='cuda:0', grad_fn=<AddBackward0>)


30it [00:09,  3.30it/s, loss=0.0138]

tensor(0.0138, device='cuda:0', grad_fn=<AddBackward0>)


31it [00:09,  3.28it/s, loss=0.0125]

tensor(0.0125, device='cuda:0', grad_fn=<AddBackward0>)


32it [00:09,  3.23it/s, loss=0.0127]

tensor(0.0127, device='cuda:0', grad_fn=<AddBackward0>)


33it [00:10,  3.21it/s, loss=0.0151]

tensor(0.0151, device='cuda:0', grad_fn=<AddBackward0>)


34it [00:10,  3.14it/s, loss=0.0132]

tensor(0.0132, device='cuda:0', grad_fn=<AddBackward0>)


35it [00:10,  3.06it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


36it [00:11,  3.16it/s, loss=0.013] 

tensor(0.0130, device='cuda:0', grad_fn=<AddBackward0>)


37it [00:11,  3.08it/s, loss=0.0132]

tensor(0.0132, device='cuda:0', grad_fn=<AddBackward0>)


38it [00:11,  3.12it/s, loss=0.0122]

tensor(0.0122, device='cuda:0', grad_fn=<AddBackward0>)


39it [00:12,  3.09it/s, loss=0.0141]

tensor(0.0141, device='cuda:0', grad_fn=<AddBackward0>)


40it [00:12,  3.20it/s, loss=0.0141]

tensor(0.0141, device='cuda:0', grad_fn=<AddBackward0>)


41it [00:12,  3.11it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


42it [00:12,  3.17it/s, loss=0.0127]

tensor(0.0127, device='cuda:0', grad_fn=<AddBackward0>)


43it [00:13,  3.18it/s, loss=0.0132]

tensor(0.0132, device='cuda:0', grad_fn=<AddBackward0>)


44it [00:13,  3.17it/s, loss=0.0121]

tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


45it [00:13,  3.14it/s, loss=0.013] 

tensor(0.0130, device='cuda:0', grad_fn=<AddBackward0>)


46it [00:14,  3.11it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


47it [00:14,  3.17it/s, loss=0.0137]

tensor(0.0137, device='cuda:0', grad_fn=<AddBackward0>)


48it [00:14,  3.14it/s, loss=0.013] 

tensor(0.0130, device='cuda:0', grad_fn=<AddBackward0>)


49it [00:15,  3.08it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


50it [00:15,  3.16it/s, loss=0.0143]

tensor(0.0143, device='cuda:0', grad_fn=<AddBackward0>)


51it [00:15,  3.16it/s, loss=0.0145]

tensor(0.0145, device='cuda:0', grad_fn=<AddBackward0>)


52it [00:16,  3.17it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


53it [00:16,  3.12it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


54it [00:16,  3.07it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


55it [00:17,  3.03it/s, loss=0.0136]

tensor(0.0136, device='cuda:0', grad_fn=<AddBackward0>)


56it [00:17,  3.09it/s, loss=0.0129]

tensor(0.0129, device='cuda:0', grad_fn=<AddBackward0>)


57it [00:17,  3.09it/s, loss=0.0146]

tensor(0.0146, device='cuda:0', grad_fn=<AddBackward0>)


58it [00:18,  3.06it/s, loss=0.0145]

tensor(0.0145, device='cuda:0', grad_fn=<AddBackward0>)


59it [00:18,  3.11it/s, loss=0.0132]

tensor(0.0132, device='cuda:0', grad_fn=<AddBackward0>)


60it [00:18,  3.00it/s, loss=0.0142]

tensor(0.0142, device='cuda:0', grad_fn=<AddBackward0>)


61it [00:19,  3.01it/s, loss=0.0134]

tensor(0.0134, device='cuda:0', grad_fn=<AddBackward0>)


62it [00:19,  3.01it/s, loss=0.0127]

tensor(0.0127, device='cuda:0', grad_fn=<AddBackward0>)


63it [00:19,  3.10it/s, loss=0.0153]

tensor(0.0153, device='cuda:0', grad_fn=<AddBackward0>)


64it [00:20,  3.20it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


65it [00:20,  3.15it/s, loss=0.0129]

tensor(0.0129, device='cuda:0', grad_fn=<AddBackward0>)


66it [00:20,  3.19it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


67it [00:20,  3.09it/s, loss=0.0107]

tensor(0.0107, device='cuda:0', grad_fn=<AddBackward0>)


68it [00:21,  3.19it/s, loss=0.0127]

tensor(0.0127, device='cuda:0', grad_fn=<AddBackward0>)


69it [00:21,  3.10it/s, loss=0.0131]

tensor(0.0131, device='cuda:0', grad_fn=<AddBackward0>)


70it [00:21,  3.10it/s, loss=0.0131]

tensor(0.0131, device='cuda:0', grad_fn=<AddBackward0>)


71it [00:22,  3.02it/s, loss=0.0133]

tensor(0.0133, device='cuda:0', grad_fn=<AddBackward0>)


72it [00:22,  2.97it/s, loss=0.0144]

tensor(0.0144, device='cuda:0', grad_fn=<AddBackward0>)


73it [00:23,  2.85it/s, loss=0.0132]

tensor(0.0132, device='cuda:0', grad_fn=<AddBackward0>)


74it [00:23,  2.95it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


75it [00:23,  2.90it/s, loss=0.0142]

tensor(0.0142, device='cuda:0', grad_fn=<AddBackward0>)


76it [00:24,  2.97it/s, loss=0.0141]

tensor(0.0141, device='cuda:0', grad_fn=<AddBackward0>)


77it [00:24,  3.04it/s, loss=0.012] 

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


78it [00:24,  3.13it/s, loss=0.0129]

tensor(0.0129, device='cuda:0', grad_fn=<AddBackward0>)


79it [00:24,  3.07it/s, loss=0.0126]

tensor(0.0126, device='cuda:0', grad_fn=<AddBackward0>)


80it [00:25,  3.16it/s, loss=0.0125]

tensor(0.0125, device='cuda:0', grad_fn=<AddBackward0>)


81it [00:25,  3.12it/s, loss=0.0124]

tensor(0.0124, device='cuda:0', grad_fn=<AddBackward0>)


82it [00:25,  3.05it/s, loss=0.0126]

tensor(0.0126, device='cuda:0', grad_fn=<AddBackward0>)


83it [00:26,  3.09it/s, loss=0.0139]

tensor(0.0139, device='cuda:0', grad_fn=<AddBackward0>)


84it [00:26,  3.02it/s, loss=0.0126]

tensor(0.0126, device='cuda:0', grad_fn=<AddBackward0>)


85it [00:26,  3.04it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


86it [00:27,  3.09it/s, loss=0.0139]

tensor(0.0139, device='cuda:0', grad_fn=<AddBackward0>)


87it [00:27,  3.12it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


88it [00:27,  3.17it/s, loss=0.0126]

tensor(0.0126, device='cuda:0', grad_fn=<AddBackward0>)


89it [00:28,  3.17it/s, loss=0.0139]

tensor(0.0139, device='cuda:0', grad_fn=<AddBackward0>)


90it [00:28,  3.25it/s, loss=0.0134]

tensor(0.0134, device='cuda:0', grad_fn=<AddBackward0>)


91it [00:28,  3.18it/s, loss=0.012] 

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


92it [00:29,  3.14it/s, loss=0.0133]

tensor(0.0133, device='cuda:0', grad_fn=<AddBackward0>)


93it [00:29,  3.16it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


94it [00:29,  3.11it/s, loss=0.0137]

tensor(0.0137, device='cuda:0', grad_fn=<AddBackward0>)


95it [00:30,  3.08it/s, loss=0.0124]

tensor(0.0124, device='cuda:0', grad_fn=<AddBackward0>)


96it [00:30,  3.15it/s, loss=0.0124]

tensor(0.0124, device='cuda:0', grad_fn=<AddBackward0>)


97it [00:30,  3.10it/s, loss=0.0124]

tensor(0.0124, device='cuda:0', grad_fn=<AddBackward0>)


98it [00:31,  3.08it/s, loss=0.0132]

tensor(0.0132, device='cuda:0', grad_fn=<AddBackward0>)


99it [00:31,  3.11it/s, loss=0.0121]

tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


100it [00:31,  3.14it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


101it [00:32,  3.03it/s, loss=0.0109]

tensor(0.0109, device='cuda:0', grad_fn=<AddBackward0>)


102it [00:32,  3.11it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


103it [00:32,  3.10it/s, loss=0.0133]

tensor(0.0133, device='cuda:0', grad_fn=<AddBackward0>)


104it [00:32,  3.16it/s, loss=0.012] 

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


105it [00:33,  3.18it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


106it [00:33,  3.24it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


107it [00:33,  3.16it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


108it [00:34,  3.20it/s, loss=0.0122]

tensor(0.0122, device='cuda:0', grad_fn=<AddBackward0>)


109it [00:34,  3.21it/s, loss=0.0138]

tensor(0.0138, device='cuda:0', grad_fn=<AddBackward0>)


110it [00:34,  3.16it/s, loss=0.012] 

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


111it [00:35,  3.16it/s, loss=0.0122]

tensor(0.0122, device='cuda:0', grad_fn=<AddBackward0>)


112it [00:35,  3.25it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


113it [00:35,  3.20it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


114it [00:36,  3.26it/s, loss=0.0125]

tensor(0.0125, device='cuda:0', grad_fn=<AddBackward0>)


115it [00:36,  3.25it/s, loss=0.0122]

tensor(0.0122, device='cuda:0', grad_fn=<AddBackward0>)


116it [00:36,  3.24it/s, loss=0.012] 

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


117it [00:37,  3.21it/s, loss=0.012]

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


118it [00:37,  3.25it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


119it [00:37,  3.10it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


120it [00:37,  3.21it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


121it [00:38,  3.20it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


122it [00:38,  3.19it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


123it [00:38,  3.06it/s, loss=0.0125]

tensor(0.0125, device='cuda:0', grad_fn=<AddBackward0>)


124it [00:39,  3.04it/s, loss=0.0109]

tensor(0.0109, device='cuda:0', grad_fn=<AddBackward0>)


125it [00:39,  3.02it/s, loss=0.0129]

tensor(0.0129, device='cuda:0', grad_fn=<AddBackward0>)


126it [00:39,  2.96it/s, loss=0.0121]

tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


127it [00:40,  3.01it/s, loss=0.0142]

tensor(0.0142, device='cuda:0', grad_fn=<AddBackward0>)


128it [00:40,  3.08it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


129it [00:40,  3.01it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


130it [00:41,  3.07it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


131it [00:41,  3.07it/s, loss=0.0129]

tensor(0.0129, device='cuda:0', grad_fn=<AddBackward0>)


132it [00:41,  3.14it/s, loss=0.0135]

tensor(0.0135, device='cuda:0', grad_fn=<AddBackward0>)


133it [00:42,  3.09it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


134it [00:42,  3.12it/s, loss=0.0113]

tensor(0.0113, device='cuda:0', grad_fn=<AddBackward0>)


135it [00:42,  3.09it/s, loss=0.0121]

tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


136it [00:43,  3.12it/s, loss=0.0105]

tensor(0.0105, device='cuda:0', grad_fn=<AddBackward0>)


137it [00:43,  3.10it/s, loss=0.0124]

tensor(0.0124, device='cuda:0', grad_fn=<AddBackward0>)


138it [00:43,  3.05it/s, loss=0.0121]

tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


139it [00:44,  3.03it/s, loss=0.013] 

tensor(0.0130, device='cuda:0', grad_fn=<AddBackward0>)


140it [00:44,  3.08it/s, loss=0.011]

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


141it [00:44,  3.11it/s, loss=0.0109]

tensor(0.0109, device='cuda:0', grad_fn=<AddBackward0>)


142it [00:45,  3.07it/s, loss=0.0127]

tensor(0.0127, device='cuda:0', grad_fn=<AddBackward0>)


143it [00:45,  3.08it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


144it [00:45,  3.13it/s, loss=0.0122]

tensor(0.0122, device='cuda:0', grad_fn=<AddBackward0>)


145it [00:46,  3.11it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


146it [00:46,  3.16it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


147it [00:46,  3.14it/s, loss=0.0124]

tensor(0.0124, device='cuda:0', grad_fn=<AddBackward0>)


148it [00:47,  3.13it/s, loss=0.0113]

tensor(0.0113, device='cuda:0', grad_fn=<AddBackward0>)


149it [00:47,  3.15it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


150it [00:47,  3.15it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


151it [00:48,  3.07it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


152it [00:48,  3.16it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


153it [00:48,  3.13it/s, loss=0.0122]

tensor(0.0122, device='cuda:0', grad_fn=<AddBackward0>)


154it [00:48,  3.04it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


155it [00:49,  3.13it/s, loss=0.0126]

tensor(0.0126, device='cuda:0', grad_fn=<AddBackward0>)


156it [00:49,  3.19it/s, loss=0.012] 

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


157it [00:49,  3.16it/s, loss=0.0124]

tensor(0.0124, device='cuda:0', grad_fn=<AddBackward0>)


158it [00:50,  3.12it/s, loss=0.0121]

tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


159it [00:50,  3.12it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


160it [00:50,  3.13it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


161it [00:51,  3.12it/s, loss=0.0132]

tensor(0.0132, device='cuda:0', grad_fn=<AddBackward0>)


162it [00:51,  3.20it/s, loss=0.0133]

tensor(0.0133, device='cuda:0', grad_fn=<AddBackward0>)


163it [00:51,  3.13it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


164it [00:52,  3.17it/s, loss=0.0113]

tensor(0.0113, device='cuda:0', grad_fn=<AddBackward0>)


165it [00:52,  3.21it/s, loss=0.0121]

tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


166it [00:52,  3.25it/s, loss=0.0109]

tensor(0.0109, device='cuda:0', grad_fn=<AddBackward0>)


167it [00:53,  3.20it/s, loss=0.012] 

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


168it [00:53,  3.21it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


169it [00:53,  3.16it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


170it [00:54,  3.19it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


171it [00:54,  3.22it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


172it [00:54,  3.28it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


173it [00:54,  3.16it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


174it [00:55,  3.19it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


175it [00:55,  3.09it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


176it [00:55,  2.92it/s, loss=0.012] 

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


177it [00:56,  3.01it/s, loss=0.0133]

tensor(0.0133, device='cuda:0', grad_fn=<AddBackward0>)


178it [00:56,  2.93it/s, loss=0.0109]

tensor(0.0109, device='cuda:0', grad_fn=<AddBackward0>)


179it [00:56,  2.98it/s, loss=0.0126]

tensor(0.0126, device='cuda:0', grad_fn=<AddBackward0>)


180it [00:57,  3.08it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


181it [00:57,  3.06it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


182it [00:57,  3.00it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


183it [00:58,  3.07it/s, loss=0.0113]

tensor(0.0113, device='cuda:0', grad_fn=<AddBackward0>)


184it [00:58,  3.09it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


185it [00:58,  3.08it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


186it [00:59,  3.07it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


187it [00:59,  3.08it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


188it [00:59,  3.09it/s, loss=0.012] 

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


189it [01:00,  3.00it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


190it [01:00,  3.06it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


191it [01:00,  2.99it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


192it [01:01,  3.10it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


193it [01:01,  3.06it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


194it [01:01,  3.04it/s, loss=0.0122]

tensor(0.0122, device='cuda:0', grad_fn=<AddBackward0>)


195it [01:02,  3.08it/s, loss=0.0106]

tensor(0.0106, device='cuda:0', grad_fn=<AddBackward0>)


196it [01:02,  3.09it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


197it [01:02,  3.00it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


198it [01:03,  3.09it/s, loss=0.0113]

tensor(0.0113, device='cuda:0', grad_fn=<AddBackward0>)


199it [01:03,  3.03it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


200it [01:03,  3.00it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


201it [01:04,  2.97it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


202it [01:04,  3.07it/s, loss=0.0138]

tensor(0.0138, device='cuda:0', grad_fn=<AddBackward0>)


203it [01:04,  3.09it/s, loss=0.012] 

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


204it [01:05,  3.12it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


205it [01:05,  3.10it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


206it [01:05,  3.06it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


207it [01:06,  3.11it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


208it [01:06,  3.17it/s, loss=0.0102]

tensor(0.0102, device='cuda:0', grad_fn=<AddBackward0>)


209it [01:06,  3.17it/s, loss=0.0109]

tensor(0.0109, device='cuda:0', grad_fn=<AddBackward0>)


210it [01:07,  3.09it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


211it [01:07,  3.07it/s, loss=0.0109]

tensor(0.0109, device='cuda:0', grad_fn=<AddBackward0>)


212it [01:07,  3.06it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


213it [01:08,  2.99it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


214it [01:08,  3.12it/s, loss=0.0104]

tensor(0.0104, device='cuda:0', grad_fn=<AddBackward0>)


215it [01:08,  3.11it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


216it [01:08,  3.15it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


217it [01:09,  3.20it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


218it [01:09,  3.29it/s, loss=0.0103]

tensor(0.0103, device='cuda:0', grad_fn=<AddBackward0>)


219it [01:09,  3.17it/s, loss=0.0113]

tensor(0.0113, device='cuda:0', grad_fn=<AddBackward0>)


220it [01:10,  3.21it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


221it [01:10,  3.13it/s, loss=0.0113]

tensor(0.0113, device='cuda:0', grad_fn=<AddBackward0>)


222it [01:10,  3.17it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


223it [01:11,  3.12it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


224it [01:11,  3.13it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


225it [01:11,  3.15it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


226it [01:12,  3.22it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


227it [01:12,  3.15it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


228it [01:12,  3.10it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


229it [01:13,  3.04it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


230it [01:13,  3.12it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


231it [01:13,  3.09it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


232it [01:14,  3.18it/s, loss=0.01]  

tensor(0.0100, device='cuda:0', grad_fn=<AddBackward0>)


233it [01:14,  3.14it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


234it [01:14,  3.19it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


235it [01:15,  3.12it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


236it [01:15,  3.17it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


237it [01:15,  3.19it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


238it [01:15,  3.20it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


239it [01:16,  3.19it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


240it [01:16,  3.17it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


241it [01:16,  3.15it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


242it [01:17,  3.15it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


243it [01:17,  3.05it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


244it [01:17,  3.04it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


245it [01:18,  3.01it/s, loss=0.013] 

tensor(0.0130, device='cuda:0', grad_fn=<AddBackward0>)


246it [01:18,  3.07it/s, loss=0.0105]

tensor(0.0105, device='cuda:0', grad_fn=<AddBackward0>)


247it [01:18,  3.09it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


248it [01:19,  3.09it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


249it [01:19,  3.03it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


250it [01:19,  3.00it/s, loss=0.0113]

tensor(0.0113, device='cuda:0', grad_fn=<AddBackward0>)


251it [01:20,  3.04it/s, loss=0.0134]

tensor(0.0134, device='cuda:0', grad_fn=<AddBackward0>)


252it [01:20,  3.11it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


253it [01:20,  3.03it/s, loss=0.0126]

tensor(0.0126, device='cuda:0', grad_fn=<AddBackward0>)


254it [01:21,  3.12it/s, loss=0.0113]

tensor(0.0113, device='cuda:0', grad_fn=<AddBackward0>)


255it [01:21,  3.08it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


256it [01:21,  3.05it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


257it [01:22,  3.02it/s, loss=0.0103]

tensor(0.0103, device='cuda:0', grad_fn=<AddBackward0>)


258it [01:22,  3.14it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


259it [01:22,  3.08it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


260it [01:23,  3.06it/s, loss=0.0121]

tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


261it [01:23,  3.05it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


262it [01:23,  3.11it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


263it [01:24,  3.06it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


264it [01:24,  3.13it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


265it [01:24,  3.07it/s, loss=0.012] 

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


266it [01:25,  3.04it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


267it [01:25,  3.05it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


268it [01:25,  3.05it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


269it [01:26,  2.95it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


270it [01:26,  3.09it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


271it [01:26,  3.08it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


272it [01:27,  3.00it/s, loss=0.0121]

tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


273it [01:27,  3.09it/s, loss=0.0124]

tensor(0.0124, device='cuda:0', grad_fn=<AddBackward0>)


274it [01:27,  3.14it/s, loss=0.013] 

tensor(0.0130, device='cuda:0', grad_fn=<AddBackward0>)


275it [01:27,  3.13it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


276it [01:28,  3.20it/s, loss=0.0122]

tensor(0.0122, device='cuda:0', grad_fn=<AddBackward0>)


277it [01:28,  3.12it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


278it [01:28,  3.14it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


279it [01:29,  3.17it/s, loss=0.0109]

tensor(0.0109, device='cuda:0', grad_fn=<AddBackward0>)


280it [01:29,  3.17it/s, loss=0.0126]

tensor(0.0126, device='cuda:0', grad_fn=<AddBackward0>)


281it [01:29,  3.17it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


282it [01:30,  3.20it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


283it [01:30,  3.21it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


284it [01:30,  3.16it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


285it [01:31,  3.08it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


286it [01:31,  3.09it/s, loss=0.0121]

tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


287it [01:31,  3.04it/s, loss=0.0121]

tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


288it [01:32,  3.06it/s, loss=0.0127]

tensor(0.0127, device='cuda:0', grad_fn=<AddBackward0>)


289it [01:32,  3.12it/s, loss=0.012] 

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


290it [01:32,  3.15it/s, loss=0.0109]

tensor(0.0109, device='cuda:0', grad_fn=<AddBackward0>)


291it [01:33,  3.09it/s, loss=0.0127]

tensor(0.0127, device='cuda:0', grad_fn=<AddBackward0>)


292it [01:33,  3.13it/s, loss=0.0105]

tensor(0.0105, device='cuda:0', grad_fn=<AddBackward0>)


293it [01:33,  3.09it/s, loss=0.012] 

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


294it [01:34,  3.07it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


295it [01:34,  3.12it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


296it [01:34,  3.16it/s, loss=0.0105]

tensor(0.0105, device='cuda:0', grad_fn=<AddBackward0>)


297it [01:35,  3.11it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


298it [01:35,  3.14it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


299it [01:35,  3.15it/s, loss=0.0113]

tensor(0.0113, device='cuda:0', grad_fn=<AddBackward0>)


300it [01:35,  3.11it/s, loss=0.0102]

tensor(0.0102, device='cuda:0', grad_fn=<AddBackward0>)


301it [01:36,  3.16it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


302it [01:36,  3.16it/s, loss=0.0105]

tensor(0.0105, device='cuda:0', grad_fn=<AddBackward0>)


303it [01:36,  2.97it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


304it [01:37,  3.07it/s, loss=0.0123]

tensor(0.0123, device='cuda:0', grad_fn=<AddBackward0>)


305it [01:37,  3.04it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


306it [01:37,  3.08it/s, loss=0.0122]

tensor(0.0122, device='cuda:0', grad_fn=<AddBackward0>)


307it [01:38,  3.08it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


308it [01:38,  3.09it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


309it [01:38,  3.00it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


310it [01:39,  3.10it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


311it [01:39,  3.09it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


313it [01:40,  3.56it/s, loss=0.0121]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


313it [01:40,  3.13it/s, loss=0.0121]

Epoch 4 of 400



1it [00:00,  2.43it/s, loss=0.0106]

tensor(0.0106, device='cuda:0', grad_fn=<AddBackward0>)


2it [00:00,  2.76it/s, loss=0.0128]

tensor(0.0128, device='cuda:0', grad_fn=<AddBackward0>)


3it [00:01,  3.02it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


4it [00:01,  3.26it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


5it [00:01,  3.28it/s, loss=0.0126]

tensor(0.0126, device='cuda:0', grad_fn=<AddBackward0>)


6it [00:01,  3.38it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


7it [00:02,  3.39it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


8it [00:02,  3.42it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


9it [00:02,  3.41it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


10it [00:03,  3.47it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


11it [00:03,  3.45it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


12it [00:03,  3.40it/s, loss=0.0113]

tensor(0.0113, device='cuda:0', grad_fn=<AddBackward0>)


13it [00:03,  3.41it/s, loss=0.0109]

tensor(0.0109, device='cuda:0', grad_fn=<AddBackward0>)


14it [00:04,  3.50it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


15it [00:04,  3.43it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


16it [00:04,  3.36it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


17it [00:05,  3.32it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


18it [00:05,  3.38it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


19it [00:05,  3.33it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


20it [00:05,  3.38it/s, loss=0.0124]

tensor(0.0124, device='cuda:0', grad_fn=<AddBackward0>)


21it [00:06,  3.28it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


22it [00:06,  3.25it/s, loss=0.0106]

tensor(0.0106, device='cuda:0', grad_fn=<AddBackward0>)


23it [00:06,  3.26it/s, loss=0.0102]

tensor(0.0102, device='cuda:0', grad_fn=<AddBackward0>)


24it [00:07,  3.33it/s, loss=0.0106]

tensor(0.0106, device='cuda:0', grad_fn=<AddBackward0>)


25it [00:07,  3.30it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


26it [00:07,  3.28it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


27it [00:08,  3.30it/s, loss=0.0109]

tensor(0.0109, device='cuda:0', grad_fn=<AddBackward0>)


28it [00:08,  3.35it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


29it [00:08,  3.19it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


30it [00:09,  3.20it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


31it [00:09,  3.18it/s, loss=0.0105]

tensor(0.0105, device='cuda:0', grad_fn=<AddBackward0>)


32it [00:09,  3.18it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


33it [00:10,  3.21it/s, loss=0.012] 

tensor(0.0120, device='cuda:0', grad_fn=<AddBackward0>)


34it [00:10,  3.30it/s, loss=0.0107]

tensor(0.0107, device='cuda:0', grad_fn=<AddBackward0>)


35it [00:10,  3.21it/s, loss=0.00995]

tensor(0.0100, device='cuda:0', grad_fn=<AddBackward0>)


36it [00:10,  3.19it/s, loss=0.0108] 

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


37it [00:11,  3.15it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


38it [00:11,  3.06it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


39it [00:11,  3.08it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


40it [00:12,  3.14it/s, loss=0.01]  

tensor(0.0100, device='cuda:0', grad_fn=<AddBackward0>)


41it [00:12,  3.02it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


42it [00:12,  3.06it/s, loss=0.0105]

tensor(0.0105, device='cuda:0', grad_fn=<AddBackward0>)


43it [00:13,  3.11it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


44it [00:13,  3.16it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


45it [00:13,  3.07it/s, loss=0.0107]

tensor(0.0107, device='cuda:0', grad_fn=<AddBackward0>)


46it [00:14,  3.17it/s, loss=0.0107]

tensor(0.0107, device='cuda:0', grad_fn=<AddBackward0>)


47it [00:14,  3.19it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


48it [00:14,  3.17it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


49it [00:15,  3.19it/s, loss=0.0121]

tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


50it [00:15,  3.15it/s, loss=0.0124]

tensor(0.0124, device='cuda:0', grad_fn=<AddBackward0>)


51it [00:15,  3.02it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


52it [00:16,  3.10it/s, loss=0.011]

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


53it [00:16,  3.09it/s, loss=0.011]

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


54it [00:16,  3.12it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


55it [00:17,  3.20it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


56it [00:17,  3.16it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


57it [00:17,  3.12it/s, loss=0.00984]

tensor(0.0098, device='cuda:0', grad_fn=<AddBackward0>)


58it [00:17,  3.21it/s, loss=0.0107] 

tensor(0.0107, device='cuda:0', grad_fn=<AddBackward0>)


59it [00:18,  3.09it/s, loss=0.00981]

tensor(0.0098, device='cuda:0', grad_fn=<AddBackward0>)


60it [00:18,  3.10it/s, loss=0.0115] 

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


61it [00:18,  3.15it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


62it [00:19,  3.28it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


63it [00:19,  3.16it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


64it [00:19,  3.15it/s, loss=0.0109]

tensor(0.0109, device='cuda:0', grad_fn=<AddBackward0>)


65it [00:20,  3.12it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


66it [00:20,  3.13it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


67it [00:20,  3.13it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


68it [00:21,  3.19it/s, loss=0.0104]

tensor(0.0104, device='cuda:0', grad_fn=<AddBackward0>)


69it [00:21,  3.18it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


70it [00:21,  3.21it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


71it [00:22,  3.20it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


72it [00:22,  3.16it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


73it [00:22,  3.10it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


74it [00:23,  3.16it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


75it [00:23,  3.14it/s, loss=0.0107]

tensor(0.0107, device='cuda:0', grad_fn=<AddBackward0>)


76it [00:23,  3.18it/s, loss=0.0105]

tensor(0.0105, device='cuda:0', grad_fn=<AddBackward0>)


77it [00:24,  3.16it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


78it [00:24,  3.20it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


79it [00:24,  3.17it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


80it [00:24,  3.16it/s, loss=0.00974]

tensor(0.0097, device='cuda:0', grad_fn=<AddBackward0>)


81it [00:25,  3.10it/s, loss=0.011]  

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


82it [00:25,  3.20it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


83it [00:25,  3.17it/s, loss=0.00995]

tensor(0.0100, device='cuda:0', grad_fn=<AddBackward0>)


84it [00:26,  3.17it/s, loss=0.0122] 

tensor(0.0122, device='cuda:0', grad_fn=<AddBackward0>)


85it [00:26,  3.11it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


86it [00:26,  3.11it/s, loss=0.0106]

tensor(0.0106, device='cuda:0', grad_fn=<AddBackward0>)


87it [00:27,  3.03it/s, loss=0.0113]

tensor(0.0113, device='cuda:0', grad_fn=<AddBackward0>)


88it [00:27,  3.13it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


89it [00:27,  3.05it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


90it [00:28,  3.14it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


91it [00:28,  3.12it/s, loss=0.0117]

tensor(0.0117, device='cuda:0', grad_fn=<AddBackward0>)


92it [00:28,  3.08it/s, loss=0.0114]

tensor(0.0114, device='cuda:0', grad_fn=<AddBackward0>)


93it [00:29,  3.10it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


94it [00:29,  3.11it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


95it [00:29,  3.07it/s, loss=0.0107]

tensor(0.0107, device='cuda:0', grad_fn=<AddBackward0>)


96it [00:30,  3.09it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


97it [00:30,  3.12it/s, loss=0.0104]

tensor(0.0104, device='cuda:0', grad_fn=<AddBackward0>)


98it [00:30,  3.11it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


99it [00:31,  2.99it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


100it [00:31,  2.99it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


101it [00:31,  2.94it/s, loss=0.0112]

tensor(0.0112, device='cuda:0', grad_fn=<AddBackward0>)


102it [00:32,  3.05it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


103it [00:32,  3.02it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


104it [00:32,  3.08it/s, loss=0.0106]

tensor(0.0106, device='cuda:0', grad_fn=<AddBackward0>)


105it [00:33,  3.08it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


106it [00:33,  3.05it/s, loss=0.0121]

tensor(0.0121, device='cuda:0', grad_fn=<AddBackward0>)


107it [00:33,  3.00it/s, loss=0.0111]

tensor(0.0111, device='cuda:0', grad_fn=<AddBackward0>)


108it [00:34,  3.10it/s, loss=0.0106]

tensor(0.0106, device='cuda:0', grad_fn=<AddBackward0>)


109it [00:34,  3.10it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


110it [00:34,  3.09it/s, loss=0.0107]

tensor(0.0107, device='cuda:0', grad_fn=<AddBackward0>)


111it [00:35,  3.08it/s, loss=0.0137]

tensor(0.0137, device='cuda:0', grad_fn=<AddBackward0>)


112it [00:35,  3.18it/s, loss=0.0109]

tensor(0.0109, device='cuda:0', grad_fn=<AddBackward0>)


113it [00:35,  3.17it/s, loss=0.0118]

tensor(0.0118, device='cuda:0', grad_fn=<AddBackward0>)


114it [00:35,  3.15it/s, loss=0.0116]

tensor(0.0116, device='cuda:0', grad_fn=<AddBackward0>)


115it [00:36,  3.17it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


116it [00:36,  3.23it/s, loss=0.0115]

tensor(0.0115, device='cuda:0', grad_fn=<AddBackward0>)


117it [00:36,  3.20it/s, loss=0.0101]

tensor(0.0101, device='cuda:0', grad_fn=<AddBackward0>)


118it [00:37,  3.28it/s, loss=0.0105]

tensor(0.0105, device='cuda:0', grad_fn=<AddBackward0>)


119it [00:37,  3.20it/s, loss=0.0108]

tensor(0.0108, device='cuda:0', grad_fn=<AddBackward0>)


120it [00:37,  3.16it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


121it [00:38,  3.15it/s, loss=0.0105]

tensor(0.0105, device='cuda:0', grad_fn=<AddBackward0>)


122it [00:38,  3.05it/s, loss=0.0107]

tensor(0.0107, device='cuda:0', grad_fn=<AddBackward0>)


123it [00:38,  3.03it/s, loss=0.0119]

tensor(0.0119, device='cuda:0', grad_fn=<AddBackward0>)


124it [00:39,  3.11it/s, loss=0.011] 

tensor(0.0110, device='cuda:0', grad_fn=<AddBackward0>)


125it [00:39,  3.01it/s, loss=0.0106]

tensor(0.0106, device='cuda:0', grad_fn=<AddBackward0>)


In [None]:
# Run training


In [None]:
def validation(model, loss_fn):
    model.eval()
    with torch.no_grad():
        loss = 0
        for i, x in enumerate(validation_loader):
            x = x.to(device).view(-1, INPUT_DIM)
            x_reconst, _, mu, sigma = model(x)
            reconst_loss = loss_fn(x_reconst, x, mu, sigma)
            kl_div = - torch.sum(1 + torch.log(sigma.pow(2)) - mu.pow(2) - sigma.pow(2))
            loss = loss + (reconst_loss + kl_div).item()
        print("Validation loss: ", loss/len(validation_loader.dataset))


In [None]:
# validation(model, model.loss_function)

In [None]:
def test_inference():
    
    import numpy as np
    import matplotlib.pyplot as plt
    
    image = Image.open("data/img_align_celeba/005002.jpg")
    transform=transforms.ToTensor()

    encodings = []
    with torch.no_grad():
        mu, sigma = model.encode(transform(image).unsqueeze(0).to(device))
        encodings.append((mu, sigma))

    mu, sigma = encodings[0]

    epsilon = torch.randn_like(sigma)
    z = mu + sigma * epsilon
    out = model.decode(z)
    out = out.view(-1, 3, 224, 192)
    
    out = out.cpu().detach().numpy()
    out = np.transpose(out, (0, 2, 3, 1))
    plt.imshow(out[0])
    #save_image(out, f"generated_ex.png")

test_inference()

In [None]:
def generate_new():
    import numpy as np
    import matplotlib.pyplot as plt
    z = torch.randn(1, Z_DIM).to(device)
    out = model.decode(z)
    out = out.view(-1, 3, 224, 192)
    #save_image(out, f"generated_ex.png")
    # change to PIL and plot
    out = out.cpu().detach().numpy()
    out = np.transpose(out, (0, 2, 3, 1))
    plt.imshow(out[0])

generate_new()
