In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
import scipy as sp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
import seaborn as sns

from collections import defaultdict
from itertools import groupby
from sklearn import datasets
from numpy import random
from scipy.stats import dirichlet, norm, poisson

In [3]:
# from keras.datasets import reuters, imdb

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset

In [5]:
import numpy as np
import os

In [6]:
from pathlib import Path
from collections import OrderedDict
import pickle, gzip, math, torch, matplotlib as mpl
import matplotlib.pyplot as plt

Path.ls = lambda x: list(x.iterdir())

## URSA Datasets 1K 

In [7]:
folder_ds_path = Path('./data/User Review Structure Analysis (URSA)/')
xml_path = (folder_ds_path/'Classified_Corpus.xml')
ds_path = (folder_ds_path/'1k')
sentence_npy_path = (folder_ds_path/'sentence.npy')
vocab_pkl_path = (ds_path/'vocab.pkl')
seed_words_path = (ds_path/'seed_words.txt')
train_filename = (ds_path/'train.txt.npy')

# log words not pass
aspect_tags = ['Food', 'Staff', 'Ambience']
polatiry_tags = ['Positive', 'Negative', 'Neutral']
xml_review_tag = './/Review'
log_np = [[], [], []]

# length allowed sentences
# length_allowed = [11, 7, 4]
# min_freq_allowed = -1

In [8]:
vocab2id = pickle.load(open(vocab_pkl_path, 'rb'))
vocab_size=len(vocab2id)

In [9]:
train_data = np.load((train_filename), allow_pickle=True)

In [10]:
p_sentence_list, label_list = train_data[:, 0], train_data[:, 1]

In [11]:
vocab = dict(map(reversed, vocab2id.items()))

In [12]:
vocab_size = len(vocab)

## Dataset

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
x_, y_ = [], []
for p_sentence, label_ in zip(p_sentence_list, label_list): 
    x_.append(p_sentence)
    y_.append(label_)

In [15]:
len(x_) == len(y_)

True

In [16]:
train_x, test_x, train_y, test_y =  train_test_split(
    x_, y_, test_size=0.1, random_state=0)

In [17]:
print ('Data Loaded')
print ('Dim Training Data',len(train_x), vocab_size)
print ('Dim Test Data', len(test_x), vocab_size)

Data Loaded
Dim Training Data 3095 2772
Dim Test Data 344 2772


## Constants

In [18]:
bs = 200
en1_units=100
en2_units=100
num_topic=3
num_input=vocab_size
variance=0.995
init_mult=1.0
learning_rate=0.0005
batch_size=200
momentum=0.99
num_epoch=200
nogpu=True
drop_rate=0.6

## Topic Model Utility Functions

In [19]:
def read_file_seed_words(fn):
    with open(fn, "r") as fr:
        def p_string_sw(l):
            return l.replace('\n','').split(',')
        rl = [p_string_sw(l) for l in fr]
    return rl

In [20]:
seed_words = read_file_seed_words(seed_words_path)

## Toy Gamma

In [21]:
num_topic_toy = 3
batch_size_toy = 2
seed_words_toy = [['a'], ['c'], ['f']]
print (seed_words_toy)

[['a'], ['c'], ['f']]


In [22]:
vocab2id_toy = {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5}

In [23]:
vocab_size_toy = len(vocab2id_toy)

In [24]:
gamma_toy = np.zeros((vocab_size_toy, num_topic_toy))
print (gamma_toy)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [25]:
gamma_bin_toy = np.zeros((batch_size_toy, vocab_size_toy, num_topic_toy))
print (gamma_bin_toy)

[[[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]]


In [26]:
for k in range(len(seed_words_toy)): # number of topics
    print ("k:", k)
    for idx in range(len(seed_words_toy[k])): # number of words
        print ("idx:", idx)
        ivocab = vocab2id_toy[seed_words_toy[k][idx]]
        gamma_toy[ivocab, k] = 1.0
        gamma_bin_toy[:, ivocab, :] = 1.0

k: 0
idx: 0
k: 1
idx: 0
k: 2
idx: 0


In [27]:
gamma_toy.shape

(6, 3)

In [28]:
gamma_toy

array([[1., 0., 0.],
       [0., 0., 0.],
       [0., 1., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 1.]])

In [29]:
gamma_bin_toy.shape

(2, 6, 3)

In [30]:
gamma_bin_toy

array([[[1., 1., 1.],
        [0., 0., 0.],
        [1., 1., 1.],
        [0., 0., 0.],
        [0., 0., 0.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [0., 0., 0.],
        [1., 1., 1.],
        [0., 0., 0.],
        [0., 0., 0.],
        [1., 1., 1.]]])

In [31]:
(gamma_bin_toy == 1.) & (gamma_bin_toy == 1.) 

array([[[ True,  True,  True],
        [False, False, False],
        [ True,  True,  True],
        [False, False, False],
        [False, False, False],
        [ True,  True,  True]],

       [[ True,  True,  True],
        [False, False, False],
        [ True,  True,  True],
        [False, False, False],
        [False, False, False],
        [ True,  True,  True]]])

In [32]:
at = torch.tensor((gamma_bin_toy == 1.) & (gamma_bin_toy == 1.) )

In [33]:
at.int().float()

tensor([[[1., 1., 1.],
         [0., 0., 0.],
         [1., 1., 1.],
         [0., 0., 0.],
         [0., 0., 0.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [0., 0., 0.],
         [1., 1., 1.],
         [0., 0., 0.],
         [0., 0., 0.],
         [1., 1., 1.]]])

In [34]:
at.int().shape

torch.Size([2, 6, 3])

In [35]:
at.int().sum((1, 2))

tensor([9, 9])

## Seed words

In [36]:
print (seed_words)

[['food', 'sauc', 'chicken', 'shrimp', 'chees', 'potato', 'fri', 'tomato', 'roast', 'onion', 'pork', 'goat', 'grill', 'tuna', 'salad', 'beef', 'tapa'], ['staff', 'servic', 'friendli', 'rude', 'hostess', 'waiter', 'bartend', 'waitress', 'help', 'polit', 'bar', 'courteou', 'member', 'waitstaff', 'attitud', 'reserv', 'tip'], ['atmospher', 'scene', 'place', 'tabl', 'outsid', 'area', 'ambianc', 'outdoor', 'romant', 'cozi', 'decor', 'sit', 'wall', 'light', 'window', 'area', 'ceil', 'floor']]


In [37]:
def setup_prior(fn, n_k=3):
    gamma = torch.zeros((len(vocab),n_k))
    gamma_bin = torch.zeros((1, len(vocab),n_k))

    full_vocab = read_file_seed_words(fn)
    for k in range(len(full_vocab)):
        for idx in range(len(full_vocab[k])):
            ivocab = vocab2id[full_vocab[k][idx]]
            gamma[ivocab, k] = 1.0
            gamma_bin[:, ivocab, :] = 1.0

    return (gamma, gamma_bin)

In [38]:
def listify(o):
    if o is None: return []
    if isinstance(o, list): return o
    if isinstance(o, str): return [o]
    if isinstance(o, Iterable): return list(o)
    return [o]
def setify(o): return o if isinstance(o,set) else set(listify(o))
def compose(x, funcs, *args, order_key='_order', **kwargs):
    key = lambda o: getattr(o, order_key, 0)
    for f in sorted(listify(funcs), key=key): x = f(x, **kwargs)
    return x

In [66]:
def print_perp(model):
    cost = []
    model.eval()                        # switch to testing mode
    for x_test, y_test in test_dl:
        recon, loss = model(x_test, compute_loss=True, avg_loss=False)
        loss = loss.data
        counts = x_test.sum(1)
        cost.extend((loss / counts).data.cpu().tolist())
    print('The approximated perplexity is: ', (np.exp(np.mean(np.array(cost)))))

def print_top_words(beta, feature_names, n_top_words=10):
    print ('---------------Printing the Topics------------------')
    for i in range(len(beta)):
        line = " ".join([feature_names[j] 
                         for j in beta[i].argsort()[:-n_top_words - 1:-1]])
        print('{}'.format(line))
    print ('---------------End of Topics------------------')
    
def print_gamma(gamma, seed_words, vocab, vocab2id):
    sws = []        
    for k in range(len(seed_words)):
        for idx in range(len(seed_words[k])):
            w = seed_words[k][idx]
            sws.append((k, w))

    for idx in range(len(sws)):
        k, w = sws[idx]
        ivocab = vocab2id[w]
        mk = gamma[ivocab].argmax(-1)
        print (ivocab, w, k, mk, gamma[ivocab])

## Data Utility Functions

In [40]:
def collate(b):
    x, y = zip(*b)
    return torch.stack(x), torch.stack(y)

class IdifyAndLimitedVocab():
    _order=-1
    def __init__(self, vocab2id, limited_vocab):
        self.vocab2id = vocab2id
        self.limited_vocab = limited_vocab
    def __call__(self, item):
        idlist = [self.vocab2id[w] for w in item if self.vocab2id[w] < self.limited_vocab]
        return np.array(idlist)
    

class Numpyify():
    _order=0
    def __call__(self, item):
        return np.array(item)

class Onehotify():
    _order=1
    def __init__(self, vocab_size):
        self.vocab_size = vocab_size
    def __call__(self, item):
        return np.array(np.bincount(item.astype('int'), minlength=self.vocab_size))
    
class YToOnehot():
    _order=1
    def __init__(self, num_classes):
        self.num_classes = num_classes
    def __call__(self, item):
        categorical = np.zeros((1, self.num_classes))
        categorical[0, item] = 1
        return categorical

class Tensorify():
    _order=2
    def __call__(self, item):
        return torch.from_numpy(item)

class Floatify():
    _order=3
    def __call__(self, item):
        return item.float()
    
class CheckAndCudify():
    _order=100
    def __init__(self):
        self.ic = torch.cuda.is_available()
    def __call__(self, item):
        return item.cuda() if self.ic else item
    
class URSADataset(Dataset):
    def __init__(self, x, y, tfms_x, tfms_y): 
        self.x, self.y = x, y
        self.x_tfms = tfms_x
        self.y_tfms = tfms_y
    def __len__(self): 
        return len(self.x)
    def __getitem__(self, i): 
        return compose(self.x[i], self.x_tfms), compose(self.y[i], self.y_tfms)
    
class Sampler():
    def __init__(self, ds, bs, shuffle=False):
        self.n,self.bs,self.shuffle = len(ds),bs,shuffle
        
    def __iter__(self):
        self.idxs = torch.randperm(self.n) if self.shuffle else torch.arange(self.n)
        for i in range(0, self.n, self.bs): yield self.idxs[i:i+self.bs]

class DataLoader():
    def __init__(self, ds, sampler, collate_fn=collate):
        self.ds,self.sampler,self.collate_fn = ds,sampler,collate_fn
        
    def __iter__(self):
        for s in self.sampler: yield self.collate_fn([self.ds[i] for i in s])

## Load Data

In [41]:
num_classes = np.max(train_y) + 1

In [42]:
num_classes

3

In [43]:
tfms_x = [Numpyify(), Onehotify(vocab_size=vocab_size), Tensorify(), Floatify(), CheckAndCudify()]
tfms_y = [YToOnehot(num_classes=num_classes), Tensorify(), Floatify(), CheckAndCudify()]

In [44]:
train_ds = URSADataset(train_x, train_y, tfms_x=tfms_x, tfms_y=tfms_y)
test_ds = URSADataset(test_x, test_y, tfms_x=tfms_x, tfms_y=tfms_y)

In [45]:
train_samp = Sampler(train_ds, bs, shuffle=False)
test_samp = Sampler(test_ds, bs, shuffle=False)

In [46]:
train_dl = DataLoader(train_ds, sampler=train_samp, collate_fn=collate)
test_dl = DataLoader(test_ds, sampler=test_samp, collate_fn=collate)

In [47]:
gamma_prior = setup_prior(seed_words_path, 3)

In [48]:
gamma, gamma_bin = gamma_prior

## Define Model

In [68]:
class ProdLDA(nn.Module):
    def __init__(self, num_input, en1_units, en2_units, num_topic, drop_rate, init_mult, gamma_prior):
        super(ProdLDA, self).__init__()
        self.num_input, self.en1_units, self.en2_units, \
        self.num_topic, self.drop_rate, self.init_mult = num_input, en1_units, en2_units, \
                                                            num_topic, drop_rate, init_mult
        # gamma prior
        self.gamma_prior = gamma_prior
        
        # encoder
        self.en1_fc = nn.Linear(num_input, en1_units)
        self.en1_ac = nn.Softplus()
        self.en2_fc     = nn.Linear(en1_units, en2_units)
        self.en2_ac = nn.Softplus()
        self.en2_dr   = nn.Dropout(drop_rate)
        
        # mean, logvar
        self.mean_fc = nn.Linear(en2_units, num_topic)
        self.mean_bn = nn.BatchNorm1d(num_topic)
        self.logvar_fc = nn.Linear(en2_units, num_topic)
        self.logvar_bn = nn.BatchNorm1d(num_topic)

        # decoder
        self.de_ac1 = nn.Softmax(dim=-1)
        self.de_dr = nn.Dropout(drop_rate)
        self.de_fc = nn.Linear(num_topic, num_input)
        self.de_bn = nn.BatchNorm1d(num_input)
        self.de_ac2 = nn.Softmax(dim=-1)
        
        # prior mean and variance as constant buffers
        self.prior_mean   = torch.Tensor(1, num_topic).fill_(0)
        self.prior_var    = torch.Tensor(1, num_topic).fill_(variance)
        self.prior_mean   = nn.Parameter(self.prior_mean, requires_grad=False)
        self.prior_var    = nn.Parameter(self.prior_var, requires_grad=False)
        self.prior_logvar = nn.Parameter(self.prior_var.log(), requires_grad=False)
        # initialize decoder weight
        if init_mult != 0:
            #std = 1. / math.sqrt( init_mult * (num_topic + num_input))
            self.de_fc.weight.data.uniform_(0, init_mult)
        # remove BN's scale parameters
        for component in [self.mean_bn, self.logvar_bn, self.de_bn]:
            component.weight.requires_grad = False
            component.weight.fill_(1.0)
        
    def gamma(self):
        # this function have to run after self.encode
        encoder_w1 = self.en1_fc.weight
        encoder_b1 = self.en1_fc.bias
        encoder_w2 = self.en2_fc.weight
        encoder_b2 = self.en2_fc.bias
        mean_w = self.mean_fc.weight
        mean_b = self.mean_fc.bias
        mean_running_mean = self.mean_bn.running_mean
        mean_running_var = self.mean_bn.running_var
        logvar_w = self.logvar_fc.weight
        logvar_b = self.logvar_fc.bias
        logvar_running_mean = self.logvar_bn.running_mean
        logvar_running_var = self.logvar_bn.running_var
        
        w1 = F.softplus(encoder_w1.t() + encoder_b1)
        w2 = F.softplus(F.linear(w1, encoder_w2, encoder_b2))
        wdr = F.dropout(w2, self.drop_rate)
        wo_mean = F.softmax(F.batch_norm(F.linear(wdr, mean_w, mean_b), mean_running_mean, mean_running_var), dim=-1)
#         wo_mean = F.softmax(F.linear(wdr, mean_w, mean_b), dim=-1)
        wo_logvar = F.softmax(F.batch_norm(F.linear(wdr, logvar_w, logvar_b), logvar_running_mean, logvar_running_var), dim=-1)
        
#         print ("wo_mean: {}".format(wo_mean[0]))
#         print ("gamma_mean: {}".format(wo_mean.size()))
#         print ("gamma_logvar: {}".format(wo_logvar.size()))
        return wo_mean, wo_logvar
            
    def encode(self, input_):
        # encoder
        # ('linear1', nn.Linear(in_feature, hidden_feature1)),
        # ('act1', nn.Softplus()),
        # ('linear2', nn.Linear(hidden_feature1, hidden_feature2)),
        # ('act2', nn.Softplus()),
        # ('dropout', nn.Dropout(drop_rate))
        # encoded = self.en(input_)
        encoded1 = self.en1_fc(input_)
        encoded1_ac = self.en1_ac(encoded1)
        encoded2 = self.en2_fc(encoded1_ac)
        encoded2_ac = self.en2_ac(encoded2)
        encoded2_dr = self.en2_dr(encoded2_ac)
        
        encoded = encoded2_dr
        
        # hidden
        # ('linear', nn.Linear(in_feature, out_feature)),
        # ('batchnorm', nn.BatchNorm1d(out_feature))
        # posterior_mean = self.mean(encoded)
        # posterior_logvar = self.logvar(encoded)
        mean_theta = self.mean_fc(encoded)
        mean_theta_bn = self.mean_bn(mean_theta)
        logvar_theta = self.logvar_fc(encoded)
        logvar_theta_bn = self.logvar_bn(logvar_theta)
        
        posterior_mean = mean_theta_bn
        posterior_logvar = logvar_theta_bn
        return encoded, posterior_mean, posterior_logvar
    
    def decode(self, input_, posterior_mean, posterior_var):
        # take sample
        eps = input_.data.new().resize_as_(posterior_mean.data).normal_() # noise 
        z = posterior_mean + posterior_var.sqrt() * eps                   # reparameterization
        # do reconstruction
        # decoder
        # ('act1', nn.Softmax(dim=-1)),
        # ('dropout', nn.Dropout(drop_rate)),
        # ('linear', nn.Linear(in_feature, out_feature)),
        # ('batchnorm', nn.BatchNorm1d(out_feature)),
        # ('act2', nn.Softmax(dim=-1))
        # recon = self.de(z)          # reconstructed distribution over vocabulary
        
        decoded1_ac = self.de_ac1(z)
        decoded1_dr = self.de_dr(decoded1_ac)
        decoded2 = self.de_fc(decoded1_dr)
        decoded2_bn = self.de_bn(decoded2)
        decoded2_ac = self.de_ac2(decoded2_bn)
        recon = decoded2_ac          # reconstructed distribution over vocabulary
        return recon
    
    def forward(self, input_, compute_loss=False, avg_loss=True):
        # compute posterior
        en2, posterior_mean, posterior_logvar = self.encode(input_) 
        posterior_var    = posterior_logvar.exp()
        
        recon = self.decode(input_, posterior_mean, posterior_var)
        if compute_loss:
            return recon, self.loss(input_, recon, posterior_mean, posterior_logvar, posterior_var, avg_loss)
        else:
            return recon

    def loss(self, input_, recon, posterior_mean, posterior_logvar, posterior_var, avg=True):
        # NL
        NL  = -(input_ * (recon + 1e-10).log()).sum(1)
        # KLD, see Section 3.3 of Akash Srivastava and Charles Sutton, 2017, 
        # https://arxiv.org/pdf/1703.01488.pdf
        prior_mean   = self.prior_mean.expand_as(posterior_mean)
        prior_var    = self.prior_var.expand_as(posterior_mean)
        prior_logvar = self.prior_logvar.expand_as(posterior_mean)
        var_division    = posterior_var  / prior_var
        diff            = posterior_mean - prior_mean
        diff_term       = diff * diff / prior_var
        logvar_division = prior_logvar - posterior_logvar
        # put KLD together
        KLD = 0.5 * ( (var_division + diff_term + logvar_division).sum(1) - self.num_topic)
#         print ("KLD: ", KLD.size())
        
        # gamma
        n, _ = input_.size()
        gamma_mean, gamma_logvar = self.gamma()
        gamma_prior, gammar_prior_bin = self.gamma_prior
        input_t = (input_ > 0).unsqueeze(dim=-1)
        input_bin = ((gammar_prior_bin.expand(n, -1, -1) == 1) & input_t)
        lambda_c = 20.0
        
        gamma_prior = gamma_prior.expand(n, -1, -1)
        
#         print (gamma_prior.size())
#         print ("gamma_mean:", gamma_mean.size())
#         print ("input_bin.int()*gamma_mean:", (input_bin.int()*gamma_mean).size())
        
        
        GL = lambda_c * (((input_bin.int()*gamma_mean) - gamma_prior)**2).sum((1, 2))
#         GL = lambda_c * ((gamma_prior - gamma_mean)**2).sum((1))
#         print ("GL: ", GL.size())
        
        
        # loss
#         loss = (NL + KLD)
#         print (GL.mean())
#         loss = (NL + KLD + GL)
        
        # in traiming mode, return averaged loss. In testing mode, return individual loss
        if avg:
            return (NL + KLD + GL).mean()
        else:
            return (NL + KLD + GL)

## Train

In [69]:
from sklearn import metrics

In [70]:
def compute_accuracy(y_pred, y_true):
    accuracy = metrics.accuracy_score(y_true, y_pred)
    precision, recall, f1_score, _ = metrics.precision_recall_fscore_support(y_true=y_true, \
                                                     y_pred=y_pred, \
                                                     average=None)

    return (accuracy, precision, recall, f1_score)

In [71]:
model = ProdLDA(num_input, en1_units, en2_units, num_topic, drop_rate, init_mult, gamma_prior)
optimizer = torch.optim.Adam(model.parameters(), learning_rate, betas=(momentum, 0.999))

In [72]:
if torch.cuda.is_available():
    model = model.cuda()

In [73]:
for epoch in range(num_epoch):
    loss_epoch = 0.0
    model.train()                    # switch to training mode
    for input_, label_ in train_dl:
        recon, loss = model(input_, compute_loss=True)
        # optimize
        optimizer.zero_grad()        # clear previous gradients
        loss.backward()              # backprop
        optimizer.step()             # update parameters
        # report
        loss_epoch += loss.item()    # add loss to loss_epoch
    if epoch % 5 == 0:
        model.eval()
        # Test Model
        pred_test = []
        label_test = []
        
        pred_train = []
        label_train = []
        
        for x_train, y_train in train_dl:
            encoded, theta_mean, theta_logvar = model.encode(x_train)
            temp_theta_mean = theta_mean.argmax(-1).int().data.cpu().tolist()
            temp_y_train = y_train.argmax(-1).flatten().data.cpu().tolist()
            
            pred_train.extend(temp_theta_mean)
            label_train.extend(temp_y_train)
        
        accuracy_train, precision_train, recall_train, f1_score_train = compute_accuracy(pred_train, label_train)
        
        for x_test, y_test in test_dl:
            encoded, theta_mean, theta_logvar = model.encode(x_test)
            temp_theta_mean = theta_mean.argmax(-1).int().data.cpu().tolist()
            temp_y_test = y_test.argmax(-1).flatten().data.cpu().tolist()
            
            pred_test.extend(temp_theta_mean)
            label_test.extend(temp_y_test)
        
        accuracy_test, precision_test, recall_test, f1_score_test = compute_accuracy(pred_test, label_test)
        print ("##################################################")
        print('Epoch {}, loss={}, accuracy_train={}, accuracy_test={}'.format(epoch, loss_epoch / len(input_), accuracy_train, accuracy_test))
        for k in range(num_topic):
            print ("precision_train{}".format(k), "=" , "{:.9f}".format(precision_train[k]), \
                 "recall_train{}".format(k), "=" , "{:.9f}".format(recall_train[k]), \
                 "f1_score_train{}".format(k), "=" , "{:.9f}".format(f1_score_train[k]))
            print ("precision_te{}".format(k), "=" , "{:.9f}".format(precision_test[k]), \
                 "recall_te{}".format(k), "=" , "{:.9f}".format(recall_test[k]), \
                 "f1_score_te{}".format(k), "=" , "{:.9f}".format(f1_score_test[k]))
        emb = model.de_fc.weight.data.detach().cpu().numpy().T
        print_top_words(emb, vocab, 50)
        print_perp(model)
        print ("##################################################")        

  _warn_prf(average, modifier, msg_start, len(result))


##################################################
Epoch 0, loss=199.39399157072367, accuracy_train=0.3531502423263328, accuracy_test=0.3808139534883721
precision_train0 = 0.346178968 recall_train0 = 1.000000000 f1_score_train0 = 0.514313440
precision_te0 = 0.364179104 recall_te0 = 1.000000000 f1_score_te0 = 0.533916849
precision_train1 = 0.000000000 recall_train1 = 0.000000000 f1_score_train1 = 0.000000000
precision_te1 = 0.000000000 recall_te1 = 0.000000000 f1_score_te1 = 0.000000000
precision_train2 = 1.000000000 recall_train2 = 0.034591195 f1_score_train2 = 0.066869301
precision_te2 = 1.000000000 recall_te2 = 0.091836735 f1_score_te2 = 0.168224299
---------------Printing the Topics------------------
brown overwhelm greet use meant howev grass zimbabw museum encrust filet vinyl flight addit set occupi magnific mig recit hey sit air pizzeria boomer madison tie lucki incorrect bruschetta pie size translat asid brush pull grape fame fight sister deli husband caper lech fair detail mole

##################################################
Epoch 25, loss=197.4289640727796, accuracy_train=0.681421647819063, accuracy_test=0.6773255813953488
precision_train0 = 0.659090909 recall_train0 = 0.738679245 f1_score_train0 = 0.696619217
precision_te0 = 0.669291339 recall_te0 = 0.696721311 f1_score_te0 = 0.682730924
precision_train1 = 0.786924939 recall_train1 = 0.601295097 f1_score_train1 = 0.681699004
precision_te1 = 0.783505155 recall_te1 = 0.612903226 f1_score_te1 = 0.687782805
precision_train2 = 0.625346901 recall_train2 = 0.708595388 f1_score_train2 = 0.664373464
precision_te2 = 0.600000000 recall_te2 = 0.734693878 f1_score_te2 = 0.660550459
---------------Printing the Topics------------------
filet style howev hey air size chestnut quit bruschetta row greet fame giovanni plu fair brown grass use strip pizzeria crumbl expos appet coffe drinker set potato incorrect addit neighborhood york sit delici like soundtrack museum section meant visit creativ singl much typic brussel luc

##################################################
Epoch 50, loss=195.82055278577303, accuracy_train=0.7547657512116317, accuracy_test=0.7732558139534884
precision_train0 = 0.850828729 recall_train0 = 0.726415094 f1_score_train0 = 0.783715013
precision_te0 = 0.903225806 recall_te0 = 0.688524590 f1_score_te0 = 0.781395349
precision_train1 = 0.750208855 recall_train1 = 0.830712303 f1_score_train1 = 0.788410887
precision_te1 = 0.767605634 recall_te1 = 0.879032258 f1_score_te1 = 0.819548872
precision_train2 = 0.672708963 recall_train2 = 0.700209644 f1_score_train2 = 0.686183873
precision_te2 = 0.669724771 recall_te2 = 0.744897959 f1_score_te2 = 0.705314010
---------------Printing the Topics------------------
filet beef potato like style appet strip size bread quit delici portion plu giovanni hey fish cream sauc fame best brown infus neighborhood chestnut air tofu crumbl howev sprout averag snow lamb crispi typic york use row creativ pound oyster drinker raw addit pizzeria bean would soundt

##################################################
Epoch 75, loss=195.4190031352796, accuracy_train=0.7705977382875606, accuracy_test=0.8023255813953488
precision_train0 = 0.959390863 recall_train0 = 0.713207547 f1_score_train0 = 0.818181818
precision_te0 = 1.000000000 recall_te0 = 0.729508197 f1_score_te0 = 0.843601896
precision_train1 = 0.672334859 recall_train1 = 0.950971323 f1_score_train1 = 0.787739464
precision_te1 = 0.699421965 recall_te1 = 0.975806452 f1_score_te1 = 0.814814815
precision_train2 = 0.772493573 recall_train2 = 0.629979036 f1_score_train2 = 0.693995381
precision_te2 = 0.804878049 recall_te2 = 0.673469388 f1_score_te2 = 0.733333333
---------------Printing the Topics------------------
beef potato filet like sauc fish bread best style strip cream appet portion onion delici infus lamb veget bean size tast crispi tofu giovanni fresh quit snow plu fri pound clam oyster pate creativ raw brown fame hey tuna sprout averag delic chestnut crumbl neighborhood barbecu addit tan

The approximated perplexity is:  1.4604579861915228e+25
##################################################
##################################################
Epoch 100, loss=194.8698858963816, accuracy_train=0.7793214862681744, accuracy_test=0.7994186046511628
precision_train0 = 0.971649485 recall_train0 = 0.711320755 f1_score_train0 = 0.821350763
precision_te0 = 1.000000000 recall_te0 = 0.737704918 f1_score_te0 = 0.849056604
precision_train1 = 0.660126582 recall_train1 = 0.964847364 f1_score_train1 = 0.783915821
precision_te1 = 0.677777778 recall_te1 = 0.983870968 f1_score_te1 = 0.802631579
precision_train2 = 0.832205683 recall_train2 = 0.644654088 f1_score_train2 = 0.726520969
precision_te2 = 0.851351351 recall_te2 = 0.642857143 f1_score_te2 = 0.732558140
---------------Printing the Topics------------------
beef potato like sauc filet fish best onion cream tast bread veget strip bean style portion lamb infus appet tofu fresh crispi delici snow clam size giovanni fri creativ delic oys

##################################################
Epoch 125, loss=194.44520970394737, accuracy_train=0.789983844911147, accuracy_test=0.813953488372093
precision_train0 = 0.973417722 recall_train0 = 0.725471698 f1_score_train0 = 0.831351351
precision_te0 = 1.000000000 recall_te0 = 0.737704918 f1_score_te0 = 0.849056604
precision_train1 = 0.666030534 recall_train1 = 0.968547641 f1_score_train1 = 0.789295138
precision_te1 = 0.693181818 recall_te1 = 0.983870968 f1_score_te1 = 0.813333333
precision_train2 = 0.858117326 recall_train2 = 0.659329140 f1_score_train2 = 0.745702430
precision_te2 = 0.871794872 recall_te2 = 0.693877551 f1_score_te2 = 0.772727273
---------------Printing the Topics------------------
beef like potato filet sauc fish best tast onion cream veget bean bread tofu strip portion infus style lamb crispi fresh delic clam creativ snow oyster pork appet spinach pate side size ponzu duck giovanni delici fri shrimp tandoori raw pound soup barbecu vinegar sprout tomato tart fame

##################################################
Epoch 150, loss=194.15679481907895, accuracy_train=0.7987075928917609, accuracy_test=0.8255813953488372
precision_train0 = 0.976190476 recall_train0 = 0.734905660 f1_score_train0 = 0.838536060
precision_te0 = 1.000000000 recall_te0 = 0.762295082 f1_score_te0 = 0.865116279
precision_train1 = 0.673983215 recall_train1 = 0.965772433 f1_score_train1 = 0.793916350
precision_te1 = 0.697142857 recall_te1 = 0.983870968 f1_score_te1 = 0.816053512
precision_train2 = 0.867647059 recall_train2 = 0.680293501 f1_score_train2 = 0.762632197
precision_te2 = 0.907894737 recall_te2 = 0.704081633 f1_score_te2 = 0.793103448
---------------Printing the Topics------------------
beef like filet potato sauc tast onion fish best bean veget cream tofu infus strip portion bread lamb style crispi delic creativ oyster duck side ponzu spinach pork pate fresh snow clam size giovanni pound tandoori shrimp barbecu soup tomato tart appet fri sprout vinegar bowl turkey r

##################################################
Epoch 175, loss=193.8511988589638, accuracy_train=0.8003231017770598, accuracy_test=0.8226744186046512
precision_train0 = 0.978398983 recall_train0 = 0.726415094 f1_score_train0 = 0.833784515
precision_te0 = 1.000000000 recall_te0 = 0.737704918 f1_score_te0 = 0.849056604
precision_train1 = 0.678153446 recall_train1 = 0.964847364 f1_score_train1 = 0.796487209
precision_te1 = 0.694915254 recall_te1 = 0.991935484 f1_score_te1 = 0.817275748
precision_train2 = 0.862337662 recall_train2 = 0.696016771 f1_score_train2 = 0.770301624
precision_te2 = 0.909090909 recall_te2 = 0.714285714 f1_score_te2 = 0.800000000
---------------Printing the Topics------------------
beef like filet potato tast sauc onion bean tofu fish cream best veget infus strip portion crispi lamb pate creativ delic ponzu style duck bread oyster snow side spinach clam pork fresh barbecu tart pound tomato tandoori soup vinegar varieti sprout bowl giovanni squash brown remark raw

In [74]:
model.eval()
gamma_mean, gamma_logvar = model.gamma()
gm, gl = gamma_mean.data.cpu().numpy(), gamma_logvar.data.cpu().numpy()
print_gamma(gm, seed_words, vocab, vocab2id)

935 food 0 1 [0.24595784 0.3870817  0.36696047]
88 sauc 0 1 [0.31352574 0.42684886 0.2596254 ]
2681 chicken 0 1 [0.33490267 0.36519474 0.2999026 ]
2414 shrimp 0 1 [0.2880804  0.50886166 0.20305803]
1381 chees 0 1 [0.3767699  0.38622332 0.23700683]
1496 potato 0 0 [0.33675858 0.33401498 0.32922643]
105 fri 0 0 [0.3889548  0.30007634 0.31096888]
546 tomato 0 0 [0.39967844 0.30169657 0.29862502]
1347 roast 0 1 [0.33251235 0.38775107 0.2797366 ]
642 onion 0 1 [0.28705537 0.40573317 0.3072115 ]
2272 pork 0 0 [0.4894018  0.2893374  0.22126083]
872 goat 0 2 [0.33823565 0.3224413  0.339323  ]
1005 grill 0 0 [0.39769867 0.2891534  0.31314793]
124 tuna 0 0 [0.46757928 0.30141896 0.23100173]
1159 salad 0 0 [0.40836665 0.27846602 0.31316736]
2188 beef 0 2 [0.33822003 0.29866228 0.3631177 ]
601 tapa 0 2 [0.30566978 0.3176589  0.3766713 ]
1991 staff 1 1 [0.15459736 0.6057356  0.23966707]
1425 servic 1 1 [0.1352211  0.6547911  0.20998779]
1137 friendli 1 1 [0.21449628 0.5812381  0.2042656 ]
1009 rude

In [61]:
for epoch in range(num_epoch):
    loss_epoch = 0.0
    model.train()                    # switch to training mode
    for input_, label_ in train_dl:
        recon, loss = model(input_, compute_loss=True)
        # optimize
        optimizer.zero_grad()        # clear previous gradients
        loss.backward()              # backprop
        optimizer.step()             # update parameters
        # report
        loss_epoch += loss.item()    # add loss to loss_epoch
    if epoch % 5 == 0:
        model.eval()
        # Test Model
        pred_test = []
        label_test = []
        
        pred_train = []
        label_train = []
        
        for x_train, y_train in train_dl:
            encoded, theta_mean, theta_logvar = model.encode(x_train)
            temp_theta_mean = theta_mean.argmax(-1).int().data.cpu().tolist()
            temp_y_train = y_train.argmax(-1).flatten().data.cpu().tolist()
            
            pred_train.extend(temp_theta_mean)
            label_train.extend(temp_y_train)
        
        accuracy_train, precision_train, recall_train, f1_score_train = compute_accuracy(pred_train, label_train)
        
        for x_test, y_test in test_dl:
            encoded, theta_mean, theta_logvar = model.encode(x_test)
            temp_theta_mean = theta_mean.argmax(-1).int().data.cpu().tolist()
            temp_y_test = y_test.argmax(-1).flatten().data.cpu().tolist()
            
            pred_test.extend(temp_theta_mean)
            label_test.extend(temp_y_test)
        
        accuracy_test, precision_test, recall_test, f1_score_test = compute_accuracy(pred_test, label_test)
        print ("##################################################")
        print('Epoch {}, loss={}, accuracy_train={}, accuracy_test={}'.format(epoch, loss_epoch / len(input_), accuracy_train, accuracy_test))
        for k in range(num_topic):
            print ("precision_train{}".format(k), "=" , "{:.9f}".format(precision_train[k]), \
                 "recall_train{}".format(k), "=" , "{:.9f}".format(recall_train[k]), \
                 "f1_score_train{}".format(k), "=" , "{:.9f}".format(f1_score_train[k]))
            print ("precision_te{}".format(k), "=" , "{:.9f}".format(precision_test[k]), \
                 "recall_te{}".format(k), "=" , "{:.9f}".format(recall_test[k]), \
                 "f1_score_te{}".format(k), "=" , "{:.9f}".format(f1_score_test[k]))
        emb = model.de_fc.weight.data.detach().cpu().numpy().T
        print_top_words(emb, vocab, 50)
        print_perp(model)
        print ("##################################################")        

  _warn_prf(average, modifier, msg_start, len(result))


##################################################
Epoch 0, loss=199.15332673725328, accuracy_train=0.3150242326332795, accuracy_test=0.29651162790697677
precision_train0 = 0.884615385 recall_train0 = 0.021698113 f1_score_train0 = 0.042357274
precision_te0 = 0.800000000 recall_te0 = 0.032786885 f1_score_te0 = 0.062992126
precision_train1 = 0.000000000 recall_train1 = 0.000000000 f1_score_train1 = 0.000000000
precision_te1 = 0.000000000 recall_te1 = 0.000000000 f1_score_te1 = 0.000000000
precision_train2 = 0.310198762 recall_train2 = 0.997903564 f1_score_train2 = 0.473278648
precision_te2 = 0.289085546 recall_te2 = 1.000000000 f1_score_te2 = 0.448512586
---------------Printing the Topics------------------
knock toast modern rose favourit nightli black vintag bubbl thousand linen impecc bead even duck anywher cow watercress heel jean magic begin piquant cafeteria soar thrown ramen leav piano quit lob stripe prompt glow fantast craft present forev ning somehow brac bud ahead held brasseri

##################################################
Epoch 25, loss=197.54257940995066, accuracy_train=0.6817447495961227, accuracy_test=0.6656976744186046
precision_train0 = 0.795841210 recall_train0 = 0.794339623 f1_score_train0 = 0.795089707
precision_te0 = 0.808695652 recall_te0 = 0.762295082 f1_score_te0 = 0.784810127
precision_train1 = 0.695238095 recall_train1 = 0.472710453 f1_score_train1 = 0.562775330
precision_te1 = 0.678571429 recall_te1 = 0.459677419 f1_score_te1 = 0.548076923
precision_train2 = 0.581413210 recall_train2 = 0.793501048 f1_score_train2 = 0.671099291
precision_te2 = 0.544827586 recall_te2 = 0.806122449 f1_score_te2 = 0.650205761
---------------Printing the Topics------------------
black duck wed rice pizza rose brasseri leav even brac lob felt simpl bubbl build raw crabcak arepa panini yet present wallet great havana begin regard select sunday swim quit bacon seem orlean lemon origin loaf thu udon money jean franchis enough world new bell toast nobu impecc music

##################################################
Epoch 50, loss=193.00463481702303, accuracy_train=0.711470113085622, accuracy_test=0.7034883720930233
precision_train0 = 0.834630350 recall_train0 = 0.809433962 f1_score_train0 = 0.821839080
precision_te0 = 0.850877193 recall_te0 = 0.795081967 f1_score_te0 = 0.822033898
precision_train1 = 0.745333333 recall_train1 = 0.517113784 f1_score_train1 = 0.610595303
precision_te1 = 0.752941176 recall_te1 = 0.516129032 f1_score_te1 = 0.612440191
precision_train2 = 0.596051632 recall_train2 = 0.822851153 f1_score_train2 = 0.691325407
precision_te2 = 0.558620690 recall_te2 = 0.826530612 f1_score_te2 = 0.666666667
---------------Printing the Topics------------------
rice black duck pizza simpl select salmon leav lemon onion loaf wed brasseri bacon chile miso favorit crabcak panini mustard bean great menu curri delici corn bubbl raw brac arepa udon rich brioch origin rose wallet melt whole present satay felt swim lob tomato build collard chees world

##################################################
Epoch 75, loss=191.96334678248354, accuracy_train=0.7408723747980613, accuracy_test=0.7325581395348837
precision_train0 = 0.864945382 recall_train0 = 0.821698113 f1_score_train0 = 0.842767296
precision_te0 = 0.886956522 recall_te0 = 0.836065574 f1_score_te0 = 0.860759494
precision_train1 = 0.769801980 recall_train1 = 0.575393154 f1_score_train1 = 0.658549497
precision_te1 = 0.777777778 recall_te1 = 0.564516129 f1_score_te1 = 0.654205607
precision_train2 = 0.625000000 recall_train2 = 0.838574423 f1_score_train2 = 0.716204118
precision_te2 = 0.575539568 recall_te2 = 0.816326531 f1_score_te2 = 0.675105485
---------------Printing the Topics------------------
rice black duck salmon pizza bean onion tomato select menu miso simpl lemon chees favorit curri bacon loaf chile mustard corn delici stuf rich leav panini rib brasseri wed veal marin crabcak satay melt classic arepa brioch dish whole shell raw bland shred bubbl foie includ udon veget f

KeyboardInterrupt: 

## Test

In [67]:
model.eval()
gamma_mean, gamma_logvar = model.gamma()
gm, gl = gamma_mean.data.cpu().numpy(), gamma_logvar.data.cpu().numpy()
print_gamma(gm, seed_words, vocab, vocab2id)

935 food 0 0 [9.9579155e-01 9.7569142e-04 3.2328062e-03]
88 sauc 0 0 [9.9990177e-01 9.7469456e-05 7.6048917e-07]
2681 chicken 0 0 [9.9934071e-01 5.7745550e-04 8.1792656e-05]
2414 shrimp 0 0 [9.9830592e-01 1.6782943e-03 1.5879226e-05]
1381 chees 0 0 [9.9708670e-01 9.7398215e-04 1.9394107e-03]
1496 potato 0 0 [9.9866140e-01 1.3378733e-03 7.2459022e-07]
105 fri 0 0 [9.9968326e-01 3.1641265e-04 3.5546523e-07]
546 tomato 0 0 [9.9890029e-01 1.0936097e-03 6.0495563e-06]
1347 roast 0 0 [9.9966240e-01 3.3393590e-04 3.6433955e-06]
642 onion 0 0 [9.9722326e-01 2.7326937e-03 4.4095090e-05]
2272 pork 0 0 [9.9362803e-01 6.3653225e-03 6.5935074e-06]
872 goat 0 0 [9.8141921e-01 1.8393096e-02 1.8771099e-04]
1005 grill 0 0 [0.98477244 0.0061522  0.00907536]
124 tuna 0 0 [0.887535   0.10492375 0.00754128]
1159 salad 0 0 [9.8114634e-01 1.8817443e-02 3.6244954e-05]
2188 beef 0 0 [9.9938369e-01 6.1500550e-04 1.3632506e-06]
601 tapa 0 0 [0.7866733  0.0673615  0.14596522]
1991 staff 1 1 [1.3457648e-03 9.98051

In [57]:
model.eval()
gamma_mean, gamma_logvar = model.gamma()
gm, gl = gamma_mean.data.cpu().numpy(), gamma_logvar.data.cpu().numpy()
print_gamma(gm, seed_words, vocab, vocab2id)

935 food 0 [0.38022685 0.31608686 0.30368632]
88 sauc 0 [0.37584203 0.3531254  0.27103254]
2681 chicken 0 [0.48940447 0.28026634 0.23032926]
2414 shrimp 0 [0.34572217 0.46510997 0.18916792]
1381 chees 0 [0.38389972 0.37722594 0.23887436]
1496 potato 0 [0.37942436 0.31294188 0.30763382]
105 fri 0 [0.5319056  0.30025738 0.16783702]
546 tomato 0 [0.3200615  0.355876   0.32406247]
1347 roast 0 [0.4098173  0.3364438  0.25373888]
642 onion 0 [0.38030115 0.35574648 0.26395237]
2272 pork 0 [0.35135072 0.4279261  0.22072314]
872 goat 0 [0.41037723 0.32486367 0.2647591 ]
1005 grill 0 [0.4381058  0.30603322 0.25586092]
124 tuna 0 [0.43908802 0.16241035 0.3985017 ]
1159 salad 0 [0.5343841  0.20409346 0.26152253]
2188 beef 0 [0.2971067  0.32509032 0.37780294]
601 tapa 0 [0.386902   0.31338027 0.2997177 ]
1991 staff 1 [0.16195919 0.67375106 0.16428977]
1425 servic 1 [0.16554125 0.68071365 0.15374507]
1137 friendli 1 [0.1578267  0.64623207 0.19594115]
1009 rude 1 [0.17415586 0.64637    0.17947413]
45

In [None]:
emb = model.de_fc.weight.data.cpu().numpy().T
print_top_words(emb, vocab, 50)
print_perp(model)