In [1]:
import math
import torch
import hess
import matplotlib.pyplot as plt
from hess.nets import Transformer
from hess.data import data_loader
import numpy as np
import sklearn.datasets as datasets
import hess.utils as utils

In [None]:
def twospirals(n_points, noise=.2, random_state=88):
    """
     Returns the two spirals dataset.
    """
    n = np.sqrt(np.random.rand(n_points,1)) * 600 * (2*np.pi)/360
    d1x = -1.5*np.cos(n)*n + np.random.randn(n_points,1) * noise
    d1y =  1.5*np.sin(n)*n + np.random.randn(n_points,1) * noise
    return (np.vstack((np.hstack((d1x,d1y)),np.hstack((-d1x,-d1y)))),
            np.hstack((np.zeros(n_points),np.ones(n_points))))

In [None]:
X, Y = twospirals(500, noise=1.5)
noisy_x = torch.FloatTensor(X)
noisy_y = torch.FloatTensor(Y)

X, Y = twospirals(500, noise=0.5)
sep_x = torch.FloatTensor(X)
sep_y = torch.FloatTensor(Y).unsqueeze(-1)

use_cuda = torch.cuda.is_available()
if use_cuda:
    torch.cuda.set_device(4)
    torch.set_default_tensor_type(torch.cuda.FloatTensor)
    noisy_x, noisy_y = noisy_x.cuda(), noisy_y.cuda()
    sep_x, sep_y = sep_x.cuda(), sep_y.cuda()

In [None]:
noise_model = Transformer(noisy_x, noisy_y, net=hess.nets.MoonNet, n_hidden=5, hidden_size=20,
                     activation=torch.nn.ELU(), bias=True)

In [None]:
sep_model = Transformer(sep_x, sep_y, net=hess.nets.MoonNet, n_hidden=5, hidden_size=20,
                     activation=torch.nn.ELU(), bias=True)

In [7]:
if use_cuda:
    noise_model = noise_model.cuda()
    sep_model = sep_model.cuda()

In [8]:
%pdb

Automatic pdb calling has been turned ON


In [None]:
noise_model.train_net(print_loss=True, lr=0.01, iters=5000,
                 loss_func=torch.nn.functional.binary_cross_entropy)

In [8]:
sep_model.train_net(print_loss=True, lr=0.01, iters=5000,
                 loss_func=torch.nn.functional.binary_cross_entropy)

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


tensor(0.6966, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6630, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6436, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6472, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6456, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6376, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6342, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6355, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6355, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6319, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6265, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6227, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6216, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6196, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6141, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6083, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6050, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6009, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.5912, grad_fn=<BinaryCrossEntropyBack

tensor(0.0039, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0039, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0039, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0039, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0039, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0039, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0039, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0039, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0039, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0039, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0038, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0038, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0038, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0038, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0038, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0038, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0038, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0038, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0038, grad_fn=<BinaryCrossEntropyBack

tensor(0.0019, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0019, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0019, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0019, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0019, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0019, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0018, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0018, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0018, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0018, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0018, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0018, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0017, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0017, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0017, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0017, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0017, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0017, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0016, grad_fn=<BinaryCrossEntropyBack

tensor(0.0003, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0003, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0003, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0003, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0002, grad_fn=<BinaryCrossEntropyBack

tensor(6.5568e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.5156e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.4750e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.4347e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.3948e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.3552e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.3161e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.2772e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.2388e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.2007e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.1630e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.1256e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.0885e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.0518e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.0155e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9793e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9437e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9083e-05, grad_fn=<Bin

tensor(2.8744e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.8634e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.8527e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.8420e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.8312e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.8206e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.8101e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7996e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7891e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7787e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7685e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7582e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7480e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7379e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7277e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7177e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7077e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.6978e-05, grad_fn=<Bin

tensor(1.6926e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6878e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6831e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6783e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6737e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6690e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6643e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6596e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6550e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6504e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6458e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6413e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6367e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6322e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6277e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6232e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6187e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.6143e-05, grad_fn=<Bin

tensor(1.1702e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1675e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1649e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1622e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1596e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1569e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1542e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1517e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1490e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1463e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1437e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1411e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1385e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1359e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1334e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1308e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1283e-05, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1257e-05, grad_fn=<Bin

tensor(8.1912e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.1751e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.1591e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.1438e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.1287e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.1129e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.0977e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.0820e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.0666e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.0515e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.0361e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.0211e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.0054e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.9907e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.9756e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.9601e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.9445e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.9300e-06, grad_fn=<Bin

tensor(6.0519e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.0420e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.0318e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.0221e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.0120e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.0021e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9916e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9821e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9723e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9626e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9528e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9431e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9330e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9236e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9138e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9038e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.8944e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.8846e-06, grad_fn=<Bin

tensor(4.6828e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.6760e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.6696e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.6623e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.6557e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.6485e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.6418e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.6351e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.6282e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.6219e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.6145e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.6080e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.6006e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.5938e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.5870e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.5806e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.5741e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.5671e-06, grad_fn=<Bin

tensor(3.7441e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7389e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7337e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7289e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7236e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7190e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7140e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7090e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7044e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.6991e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.6945e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.6893e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.6846e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.6800e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.6749e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.6701e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.6653e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.6603e-06, grad_fn=<Bin

tensor(3.1088e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.1049e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.1007e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0972e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0930e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0897e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0856e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0817e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0779e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0747e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0705e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0668e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0624e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0588e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0556e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0519e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0479e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0440e-06, grad_fn=<Bin

tensor(2.6160e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.6125e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.6098e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.6068e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.6033e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.6005e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.5975e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.5943e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.5911e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.5877e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.5847e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.5816e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.5788e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.5757e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.5728e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.5699e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.5668e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.5639e-06, grad_fn=<Bin

tensor(2.2206e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.2183e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.2161e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.2137e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.2113e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.2088e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.2064e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.2036e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.2016e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.1989e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.1967e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.1943e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.1918e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.1893e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.1871e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.1847e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.1825e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.1804e-06, grad_fn=<Bin

tensor(1.8840e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8819e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8799e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8781e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8760e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8741e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8721e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8699e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8681e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8663e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8643e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8623e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8605e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8583e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8567e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8546e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8525e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.8510e-06, grad_fn=<Bin

tensor(1.5886e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5868e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5851e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5834e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5821e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5806e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5790e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5774e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5759e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5746e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5731e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5718e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5704e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5689e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5671e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5658e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5642e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.5628e-06, grad_fn=<Bin

tensor(1.3835e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3823e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3813e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3798e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3786e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3776e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3764e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3753e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3737e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3726e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3710e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3699e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3687e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3678e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3660e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3648e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3636e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.3623e-06, grad_fn=<Bin

tensor(1.1865e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1852e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1841e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1833e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1824e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1812e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1802e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1792e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1780e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1769e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1759e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1748e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1739e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1729e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1719e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1710e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1698e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.1690e-06, grad_fn=<Bin

tensor(1.0245e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0236e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0228e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0219e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0209e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0200e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0192e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0186e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0176e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0167e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0160e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0153e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0143e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0135e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0125e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0117e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0110e-06, grad_fn=<BinaryCrossEntropyBackward>)
tensor(1.0101e-06, grad_fn=<Bin

tensor(8.8422e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.8374e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.8297e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.8219e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.8148e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.8040e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.7945e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.7849e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.7796e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.7742e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.7659e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.7599e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.7522e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.7462e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.7390e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.7301e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.7259e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(8.7206e-07, grad_fn=<Bin

tensor(7.6667e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.6601e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.6553e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.6506e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.6416e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.6369e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.6309e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.6243e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.6166e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.6118e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.6059e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.6005e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.5910e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.5868e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.5820e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.5755e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.5707e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(7.5665e-07, grad_fn=<Bin

tensor(6.6873e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6813e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6730e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6670e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6628e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6575e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6521e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6491e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6438e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6402e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6360e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6277e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6229e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6187e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6134e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6086e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.6038e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(6.5985e-07, grad_fn=<Bin

tensor(5.9934e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9875e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9833e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9767e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9744e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9714e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9678e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9630e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9583e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9529e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9481e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9434e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9404e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9362e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9308e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9267e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9243e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.9207e-07, grad_fn=<Bin

tensor(5.3777e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3735e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3711e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3646e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3598e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3550e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3526e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3485e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3431e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3419e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3365e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3306e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3270e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3234e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3187e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3169e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3145e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(5.3109e-07, grad_fn=<Bin

tensor(4.8394e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.8352e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.8329e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.8281e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.8233e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.8221e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.8180e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.8132e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.8096e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.8078e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.8019e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.8013e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.7983e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.7965e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.7893e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.7876e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.7834e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.7786e-07, grad_fn=<Bin

tensor(4.2827e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2779e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2761e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2702e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2678e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2666e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2618e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2606e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2577e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2559e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2505e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2469e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2445e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2433e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2410e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2386e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2338e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(4.2320e-07, grad_fn=<Bin

tensor(3.8130e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.8118e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.8064e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.8046e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.8023e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.8005e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7987e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7951e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7945e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7897e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7891e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7874e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7850e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7838e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7802e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7796e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7754e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.7736e-07, grad_fn=<Bin

tensor(3.3969e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3957e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3922e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3910e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3892e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3880e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3874e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3832e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3820e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3796e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3767e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3755e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3719e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3707e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3683e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3653e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3641e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.3624e-07, grad_fn=<Bin

tensor(3.0214e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0190e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0184e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0154e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0148e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0125e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0119e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0077e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0071e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0059e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0047e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0029e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(3.0017e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.9982e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.9958e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.9916e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.9910e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.9886e-07, grad_fn=<Bin

tensor(2.7264e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7222e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7216e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7180e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7156e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7150e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7144e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7132e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7103e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7097e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7073e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7073e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7037e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7013e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.7013e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.6989e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.6971e-07, grad_fn=<BinaryCrossEntropyBackward>)
tensor(2.6948e-07, grad_fn=<Bin

## Look at Spectra of Both Models

In [11]:
n_par = sum(p.numel() for p in noise_model.net.parameters())

In [17]:
noise_hessian = torch.zeros(n_par, n_par).cpu()
for pp in range(n_par):
    base_vec = torch.zeros(n_par).cpu().unsqueeze(0)
    base_vec[0, pp] = 1.

    base_vec = utils.unflatten_like(base_vec, noise_model.net.parameters())
    utils.eval_hess_vec_prod(base_vec, noise_model.net.parameters(),
                            net=noise_model.net.cpu(),
                            criterion=torch.nn.BCELoss(),
                            inputs=noisy_x.cpu(), targets=noisy_y.cpu())
    if pp == 0:
        output = utils.gradtensor_to_tensor(noise_model.net, include_bn=True)
        noise_hessian = torch.zeros(output.nelement(), output.nelement())
        noise_hessian[:, pp] = output
    noise_hessian[:, pp] = utils.gradtensor_to_tensor(noise_modey.net.cpu(), include_bn=True).cpu()


RuntimeError: derivative for binary_cross_entropy_backward is not implemented

In [1]:
sep_hessian = torch.zeros(n_par, n_par).cpu()
for pp in range(n_par):
    base_vec = torch.zeros(n_par_w_bias).cpu().unsqueeze(0)
    base_vec[0, pp] = 1.

    base_vec = utils.unflatten_like(base_vec, sep_model.net.parameters())
    utils.eval_hess_vec_prod(base_vec, sep_model.net.parameters(),
                            net=sep_model.net.cpu(),
                            criterion=torch.nn.CrossEntropy(),
                            inputs=train_x.cpu(), targets=train_y.cpu())
    if pp == 0:
        output = utils.gradtensor_to_tensor(sep_model.net, include_bn=True)
        sep_hessian = torch.zeros(output.nelement(), output.nelement())
        sep_hessian[:, pp] = output
    sep_hessian[:, pp] = utils.gradtensor_to_tensor(sep_modey.net.cpu(), include_bn=True).cpu()


NameError: name 'torch' is not defined