# Autoencoding Variational Inference For Topic Models 

Paper: https://arxiv.org/pdf/1703.01488.pdf

Code in TF: https://akashgit.github.io/autoencoding_vi_for_topic_models/

I use the reimplementation in pytorch: https://github.com/hyqneuron/pytorch-avitm/blob/master/pytorch_model.py

## Load data

In [10]:
import scipy.sparse
import scanpy as sc
import numpy as np

In [2]:
outdir = "/home/jovyan/data/lung_adult_scATAC/"
experiment_prefix = 'lungAdult_'

In [5]:
adata = sc.read_h5ad(outdir + experiment_prefix + '_ATAC_raw.h5ad')

### ProdLDA implementation

In [6]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import Parameter
import torch.nn.functional as F
import math


class ProdLDA(nn.Module):

    def __init__(self, net_arch):
        super(ProdLDA, self).__init__()
        ac = net_arch
        self.net_arch = net_arch
        # encoder
        self.en1_fc     = nn.Linear(ac.num_input, ac.en1_units)             # 1995 -> 100
        self.en2_fc     = nn.Linear(ac.en1_units, ac.en2_units)             # 100  -> 100
        self.en2_drop   = nn.Dropout(0.2)
        self.mean_fc    = nn.Linear(ac.en2_units, ac.num_topic)             # 100  -> 50
        self.mean_bn    = nn.BatchNorm1d(ac.num_topic)                      # bn for mean
        self.logvar_fc  = nn.Linear(ac.en2_units, ac.num_topic)             # 100  -> 50
        self.logvar_bn  = nn.BatchNorm1d(ac.num_topic)                      # bn for logvar
        # z
        self.p_drop     = nn.Dropout(0.2)
        # decoder
        self.decoder    = nn.Linear(ac.num_topic, ac.num_input)             # 50   -> 1995
        self.decoder_bn = nn.BatchNorm1d(ac.num_input)                      # bn for decoder
        # prior mean and variance as constant buffers
        prior_mean   = torch.Tensor(1, ac.num_topic).fill_(0)
        prior_var    = torch.Tensor(1, ac.num_topic).fill_(ac.variance)
        prior_logvar = prior_var.log()
        self.register_buffer('prior_mean',    prior_mean)
        self.register_buffer('prior_var',     prior_var)
        self.register_buffer('prior_logvar',  prior_logvar)
        # initialize decoder weight
        if ac.init_mult != 0:
            #std = 1. / math.sqrt( ac.init_mult * (ac.num_topic + ac.num_input))
            self.decoder.weight.data.uniform_(0, ac.init_mult)
        # remove BN's scale parameters
        self.logvar_bn .register_parameter('weight', None)
        self.mean_bn   .register_parameter('weight', None)
        self.decoder_bn.register_parameter('weight', None)
        self.decoder_bn.register_parameter('weight', None)

    def forward(self, input, compute_loss=False, avg_loss=True):
        # compute posterior
        en1 = F.softplus(self.en1_fc(input))                            # en1_fc   output
        en2 = F.softplus(self.en2_fc(en1))                              # encoder2 output
        en2 = self.en2_drop(en2)
        posterior_mean   = self.mean_bn  (self.mean_fc  (en2))          # posterior mean
        posterior_logvar = self.logvar_bn(self.logvar_fc(en2))          # posterior log variance
        posterior_var    = posterior_logvar.exp()
        # take sample
        eps = Variable(input.data.new().resize_as_(posterior_mean.data).normal_()) # noise
        z = posterior_mean + posterior_var.sqrt() * eps                 # reparameterization
        p = F.softmax(z)                                                # mixture probability
        p = self.p_drop(p)
        # do reconstruction
        recon = F.softmax(self.decoder_bn(self.decoder(p)))             # reconstructed distribution over vocabulary

        if compute_loss:
            return recon, self.loss(input, recon, posterior_mean, posterior_logvar, posterior_var, avg_loss)
        else:
            return recon

    def loss(self, input, recon, posterior_mean, posterior_logvar, posterior_var, avg=True):
        # NL
        NL  = -(input * (recon+1e-10).log()).sum(1)
        # KLD, see Section 3.3 of Akash Srivastava and Charles Sutton, 2017, 
        # https://arxiv.org/pdf/1703.01488.pdf
        prior_mean   = Variable(self.prior_mean).expand_as(posterior_mean)
        prior_var    = Variable(self.prior_var).expand_as(posterior_mean)
        prior_logvar = Variable(self.prior_logvar).expand_as(posterior_mean)
        var_division    = posterior_var  / prior_var
        diff            = posterior_mean - prior_mean
        diff_term       = diff * diff / prior_var
        logvar_division = prior_logvar - posterior_logvar
        # put KLD together
        KLD = 0.5 * ( (var_division + diff_term + logvar_division).sum(1) - self.net_arch.num_topic )
        # loss
        loss = (NL + KLD)
        # in traiming mode, return averaged loss. In testing mode, return individual loss
        if avg:
            return loss.mean()
        else:
            return loss

In [7]:
# import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable, Function
import torch.cuda
from pprint import pprint, pformat
import pickle
import argparse
import os
import math
import matplotlib.pyplot as plt

# parser = argparse.ArgumentParser()
# parser.add_argument('-f', '--en1-units',        type=int,   default=100)
# parser.add_argument('-s', '--en2-units',        type=int,   default=100)
# parser.add_argument('-t', '--num-topic',        type=int,   default=50)
# parser.add_argument('-b', '--batch-size',       type=int,   default=200)
# parser.add_argument('-o', '--optimizer',        type=str,   default='Adam')
# parser.add_argument('-r', '--learning-rate',    type=float, default=0.002)
# parser.add_argument('-m', '--momentum',         type=float, default=0.99)
# parser.add_argument('-e', '--num-epoch',        type=int,   default=80)
# parser.add_argument('-q', '--init-mult',        type=float, default=1.0)    # multiplier in initialization of decoder weight
# parser.add_argument('-v', '--variance',         type=float, default=0.995)  # default variance in prior normal
# parser.add_argument('--start',                  action='store_true')        # start training at invocation
# parser.add_argument('--nogpu',                  action='store_true')        # do not use GPU acceleration

# args = parser.parse_args()

nogpu = True

# # default to use GPU, but have to check if GPU exists
# if not nogpu:
#     if torch.cuda.device_count() == 0:
#         args.nogpu = True

def to_onehot(data, min_length):
    return np.bincount(data, minlength=min_length)

def make_data():
    global data_tr, data_te, tensor_tr, tensor_te, vocab, vocab_size
    dataset_tr = 'data/20news_clean/train.txt.npy'
    data_tr = np.load(dataset_tr)
    dataset_te = 'data/20news_clean/test.txt.npy'
    data_te = np.load(dataset_te)
    vocab = 'data/20news_clean/vocab.pkl'
    vocab = pickle.load(open(vocab,'r'))
    vocab_size=len(vocab)
    #--------------convert to one-hot representation------------------
    print('Converting data to one-hot representation')
    data_tr = np.array([to_onehot(doc.astype('int'),vocab_size) for doc in data_tr if np.sum(doc)!=0])
    data_te = np.array([to_onehot(doc.astype('int'),vocab_size) for doc in data_te if np.sum(doc)!=0])
    #--------------print the data dimentions--------------------------
    print('Data Loaded')
    print('Dim Training Data',data_tr.shape)
    print('Dim Test Data',data_te.shape)
    #-------------make tensor datasets-------------------------------
    tensor_tr = torch.from_numpy(data_tr).float()
    tensor_te = torch.from_numpy(data_te).float()
    if not nogpu:
        tensor_tr = tensor_tr.cuda()
        tensor_te = tensor_te.cuda()

#### Make data

In [44]:
data = adata[0:5000].layers["binary_raw"].toarray()
data = data[:,np.array(data.sum(0)!=0)]
data_tr_coo = scipy.sparse.coo_matrix(data)

In [45]:
values = data_tr_coo.data
indices = np.vstack((data_tr_coo.row, data_tr_coo.col))

i = torch.LongTensor(indices)
v = torch.FloatTensor(values)
shape = data_tr_coo.shape

tensor_tr = torch.sparse.FloatTensor(i, v, torch.Size(shape)).to_dense()

In [46]:
# tensor_tr = torch.from_numpy(data_tr).float()
# tensor_te = torch.from_numpy(data_te).float()
if not nogpu:
    tensor_tr = tensor_tr.cuda()
#     tensor_te = tensor_te.cuda()

In [47]:
class Dict2Obj(object):
    """
    Turns a dictionary into a class
    """
    #----------------------------------------------------------------------
    def __init__(self, dictionary):
        """Constructor"""
        for key in dictionary:
            setattr(self, key, dictionary[key])
        
    
#----------------------------------------------------------------------
if __name__ == "__main__":
    ball_dict = {"color":"blue",
                 "size":"8 inches",
                 "material":"rubber"}
    ball = Dict2Obj(ball_dict)

In [48]:

# parser.add_argument('-f', '--en1-units',        type=int,   default=100)
# parser.add_argument('-s', '--en2-units',        type=int,   default=100)
# parser.add_argument('-t', '--num-topic',        type=int,   default=50)
# parser.add_argument('-b', '--batch-size',       type=int,   default=200)
# parser.add_argument('-o', '--optimizer',        type=str,   default='Adam')
# parser.add_argument('-r', '--learning-rate',    type=float, default=0.002)
# parser.add_argument('-m', '--momentum',         type=float, default=0.99)
# parser.add_argument('-e', '--num-epoch',        type=int,   default=80)
# parser.add_argument('-q', '--init-mult',        type=float, default=1.0)    # multiplier in initialization of decoder weight
# parser.add_argument('-v', '--variance',         type=float, default=0.995)  # default variance in prior normal
# parser.add_argument('--start',                  action='store_true')        # start training at invocation
# parser.add_argument('--nogpu',                  action='store_true')        # do not use GPU acceleration


args_dict = {'en1_units':100, 'en2_units':100, 'num_topic':30, 'num_input':1, 
            'variance':0.995, "init_mult":1, 'optimizer':'Adam', 'learning_rate':0.01,
            "momentum":0.99, 'batch_size':200, "num_epoch":50}

In [49]:
args = Dict2Obj(args_dict)

In [50]:
net_arch = Dict2Obj(args_dict)

#### Make model

In [51]:
# net_arch = {'en1_units':100, 'en2_units':100, 'num_topic':30, 'num_input':1}
net_arch.num_input = data_tr_coo.shape[1]
model = ProdLDA(net_arch)
if not nogpu:
    model = model.cuda()

        
# make_model()

#### Make optimizer

In [None]:
# def make_optimizer():
#     global optimizer
if args.optimizer == 'Adam':
    optimizer = torch.optim.Adam(model.parameters(), args.learning_rate, betas=(args.momentum, 0.999))
elif args.optimizer == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum)
else:
    assert False, 'Unknown optimizer {}'.format(args.optimizer)

#### Train

In [58]:

for epoch in range(args.num_epoch):
    all_indices = torch.randperm(tensor_tr.size(0)).split(args.batch_size)
    loss_epoch = 0.0
    model.train()                   # switch to training mode
    for batch_indices in all_indices:
        if not nogpu: batch_indices = batch_indices.cuda()
        input = Variable(tensor_tr[batch_indices])
        recon, loss = model(input, compute_loss=True)
        # optimize
        optimizer.zero_grad()       # clear previous gradients
        loss.backward()             # backprop
        optimizer.step()            # update parameters
        # report
        loss_epoch += loss.data[0]    # add loss to loss_epoch
    if epoch % 5 == 0:
        print('Epoch {}, loss={}'.format(epoch, loss_epoch / len(all_indices)))



RuntimeError: tensor does not have a device (device at /tmp/pip-req-build-w9kte7xz/c10/core/TensorImpl.h:463)
frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) + 0x6d (0x7f94496cfead in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libc10.so)
frame #1: <unknown function> + 0x12d3f7 (0x7f9447b6b3f7 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_python.so)
frame #2: at::Tensor::options() const + 0x6f (0x7f9447b73c1f in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_python.so)
frame #3: at::native::empty_like(at::Tensor const&, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) + 0x46 (0x7f9443f48f06 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #4: <unknown function> + 0xe3ff52 (0x7f944426bf52 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #5: <unknown function> + 0xe3a7b3 (0x7f94442667b3 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #6: <unknown function> + 0x9d2821 (0x7f9443dfe821 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #7: <unknown function> + 0x9d493c (0x7f9443e0093c in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #8: at::native::batch_norm_backward_cpu(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, std::array<bool, 3ul>) + 0x12b (0x7f9443e00fdb in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #9: <unknown function> + 0xd7502d (0x7f94441a102d in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #10: <unknown function> + 0xd5d82c (0x7f944418982c in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #11: <unknown function> + 0x287f7c8 (0x7f9445cab7c8 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #12: <unknown function> + 0xd5d82c (0x7f944418982c in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #13: <unknown function> + 0x25ba6a1 (0x7f94459e66a1 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #14: torch::autograd::generated::NativeBatchNormBackward::apply(std::vector<at::Tensor, std::allocator<at::Tensor> >&&) + 0x36e (0x7f94459e6c8e in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #15: <unknown function> + 0x2ad9797 (0x7f9445f05797 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #16: torch::autograd::Engine::evaluate_function(std::shared_ptr<torch::autograd::GraphTask>&, torch::autograd::Node*, torch::autograd::InputBuffer&) + 0x172b (0x7f9445f0f06b in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #17: torch::autograd::Engine::thread_main(std::shared_ptr<torch::autograd::GraphTask> const&, bool) + 0x415 (0x7f9445f0fe95 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #18: torch::autograd::Engine::thread_init(int) + 0x4b (0x7f9445f04b1b in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #19: torch::autograd::python::PythonEngine::thread_init(int) + 0x4a (0x7f9447f1cdaa in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_python.so)
frame #20: <unknown function> + 0xc9067 (0x7f9584a9f067 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/zmq/backend/cython/../../../../.././libstdc++.so.6)
frame #21: <unknown function> + 0x76db (0x7f95877d46db in /lib/x86_64-linux-gnu/libpthread.so.0)
frame #22: clone + 0x3f (0x7f95874fd88f in /lib/x86_64-linux-gnu/libc.so.6)


In [54]:
train()



RuntimeError: tensor does not have a device (device at /tmp/pip-req-build-w9kte7xz/c10/core/TensorImpl.h:463)
frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) + 0x6d (0x7f94496cfead in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libc10.so)
frame #1: <unknown function> + 0x12d3f7 (0x7f9447b6b3f7 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_python.so)
frame #2: at::Tensor::options() const + 0x6f (0x7f9447b73c1f in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_python.so)
frame #3: at::native::empty_like(at::Tensor const&, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) + 0x46 (0x7f9443f48f06 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #4: <unknown function> + 0xe3ff52 (0x7f944426bf52 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #5: <unknown function> + 0xe3a7b3 (0x7f94442667b3 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #6: <unknown function> + 0x9d2821 (0x7f9443dfe821 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #7: <unknown function> + 0x9d493c (0x7f9443e0093c in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #8: at::native::batch_norm_backward_cpu(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, std::array<bool, 3ul>) + 0x12b (0x7f9443e00fdb in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #9: <unknown function> + 0xd7502d (0x7f94441a102d in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #10: <unknown function> + 0xd5d82c (0x7f944418982c in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #11: <unknown function> + 0x287f7c8 (0x7f9445cab7c8 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #12: <unknown function> + 0xd5d82c (0x7f944418982c in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #13: <unknown function> + 0x25ba6a1 (0x7f94459e66a1 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #14: torch::autograd::generated::NativeBatchNormBackward::apply(std::vector<at::Tensor, std::allocator<at::Tensor> >&&) + 0x36e (0x7f94459e6c8e in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #15: <unknown function> + 0x2ad9797 (0x7f9445f05797 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #16: torch::autograd::Engine::evaluate_function(std::shared_ptr<torch::autograd::GraphTask>&, torch::autograd::Node*, torch::autograd::InputBuffer&) + 0x172b (0x7f9445f0f06b in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #17: torch::autograd::Engine::thread_main(std::shared_ptr<torch::autograd::GraphTask> const&, bool) + 0x415 (0x7f9445f0fe95 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #18: torch::autograd::Engine::thread_init(int) + 0x4b (0x7f9445f04b1b in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_cpu.so)
frame #19: torch::autograd::python::PythonEngine::thread_init(int) + 0x4a (0x7f9447f1cdaa in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/torch/lib/libtorch_python.so)
frame #20: <unknown function> + 0xc9067 (0x7f9584a9f067 in /home/jovyan/my-conda-envs/emma_env/lib/python3.7/site-packages/zmq/backend/cython/../../../../.././libstdc++.so.6)
frame #21: <unknown function> + 0x76db (0x7f95877d46db in /lib/x86_64-linux-gnu/libpthread.so.0)
frame #22: clone + 0x3f (0x7f95874fd88f in /lib/x86_64-linux-gnu/libc.so.6)


In [None]:

def identify_topic_in_line(line):
    topics = []
    for topic, keywords in associations.iteritems():
        for word in keywords:
            if word in line:
                topics.append(topic)
                break
    return topics

def print_top_words(beta, feature_names, n_top_words=10):
    print '---------------Printing the Topics------------------'
    for i in range(len(beta)):
        line = " ".join([feature_names[j] 
                            for j in beta[i].argsort()[:-n_top_words - 1:-1]])
        topics = identify_topic_in_line(line)
        print('|'.join(topics))
        print('     {}'.format(line))
    print '---------------End of Topics------------------'

def print_perp(model):
    cost=[]
    model.eval()                        # switch to testing mode
    input = Variable(tensor_te)
    recon, loss = model(input, compute_loss=True, avg_loss=False)
    loss = loss.data
    counts = tensor_te.sum(1)
    avg = (loss / counts).mean()
    print('The approximated perplexity is: ', math.exp(avg))

def visualize():
    global recon
    input = Variable(tensor_te[:10])
    register_vis_hooks(model)
    recon = model(input, compute_loss=False)
    remove_vis_hooks()
    save_visualization('pytorch_model', 'png')

if __name__=='__main__' and args.start:
    make_data()
    make_model()
    make_optimizer()
    train()
    emb = model.decoder.weight.data.cpu().numpy().T
    print_top_words(emb, zip(*sorted(vocab.items(), key=lambda x:x[1]))[0])
    print_perp(model)
    visualize()