In [1]:
    import torch
    from sebm_mnist.data import load_data
#     from sebm_mnist.objectives import mle
    from sebm_mnist.modules.sgld import SGLD_sampler
    from sebm_mnist.modules.data_noise import DATA_NOISE_sampler
    from sebm_mnist.modules.energy_function import Energy_function
    
    CUDA = torch.cuda.is_available()
    if CUDA:
        DEVICE = torch.device('cuda:0')
    print('torch:', torch.__version__, 'CUDA:', CUDA)
    # optimization hyper-parameters
    num_epochs = 100
    sample_size = 100
    batch_size = 100
    lr = 1e-4
    ## model hyper-parameters
    D = 2 # data point dimensions
    hidden_dim = 400
    pixels_dim = 28*28
    latents_dim = 10
    reparameterized = False
    optimize_priors = False
    ## EBM hyper-parameters
    data_noise_std = 0.1
    sgld_num_steps = 2
    sgld_step_size = 1
    sgld_init_sample_std = 0.1
    sgld_noise_std = 0.01
    SAVE_VERSION = 'ebm-v1' 
    
    ## data directory
    print('Load MNIST dataset...')
    DATA_DIR = '/home/hao/Research/sebm_data/'
    train_data, test_data = load_data(DATA_DIR, batch_size)
    
    print('Initialize energy function and optimizer...')
    ef = Energy_function(pixels_dim, hidden_dim)
    if CUDA:
        ef.cuda().to(DEVICE)   
    optimizer = torch.optim.Adam(list(ef.parameters()), lr=lr, betas=(0.9, 0.99))
    
    print('Initialize SGLD sampler...')
    sgld_sampler = SGLD_sampler(sgld_init_sample_std, sgld_noise_std, CUDA, DEVICE)
    
    print('Initialize data noise sampler...')
    data_noise_sampler = DATA_NOISE_sampler(data_noise_std, CUDA, DEVICE)
#     data_noise_sampler = None

torch: 1.3.0 CUDA: True
Load MNIST dataset...
Initialize energy function and optimizer...
Initialize SGLD sampler...
Initialize data noise sampler...


In [2]:
for b, (images, _) in enumerate(train_data):
    break

In [3]:
pixels_size = int(images.shape[-1]*images.shape[-2])
batch_size = images.shape[0]
optimizer.zero_grad()
images = images.squeeze(1).view(-1, pixels_size).repeat(sample_size, 1, 1)
if CUDA:
    images = images.cuda().to(DEVICE)
if data_noise_sampler is not None: ## add Gaussian noise to true data images
    data_noise = data_noise_sampler.sample(sample_size, batch_size, pixels_size)
    assert images.shape == data_noise.shape, "ERROR! data noise have unexpected shape."
    images = images + data_noise

In [34]:
images.requires_grad = True
E = ef.forward(images)
grad_a = torch.autograd.grad(outputs=E.sum(), inputs=images)

In [35]:
ef.forward(images).sum().backward()

In [None]:
grad_b = images.grad

In [None]:
grad_a[0] - grad_b

In [38]:
import time
images.requires_grad = True

for i in range(10):
    E = ef.forward(images)
    grads = torch.autograd.grad(outputs=E.sum(), inputs=images)
    images = (images - (1 / 2) * grads[0] + 0.1).detach()
    time.sleep(2)

RuntimeError: One of the differentiated Tensors does not require grad

In [None]:
list(ef.parameters())[0].grad

In [None]:
images = (images - (1 / 2) * images.grad + 0.1).detach()

In [None]:
images.requires_grad

In [None]:
E = ef.forward(images)
E.sum().backward()

In [None]:
images.grad

In [None]:
a = torch.Tensor([1, 2])

In [None]:
a.requires_grad = True

In [None]:
a.sum().backward()

In [None]:
a.grad

In [None]:
opt = torch.optim.SGD([a], lr=1)

In [None]:
opt.step()

In [None]:
a