In [1]:
import json
import nltk
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.distributions.normal as normal
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt

from torch.nn.utils import clip_grad_norm_
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from nltk.tokenize import word_tokenize
from nltk.tokenize import MWETokenizer

from gensim.models import Word2Vec
import pickle
from vae_util import Util
from vae import VAE

In [2]:
util = Util()

Loaded Util


In [3]:
yelp_total_epoch_losses_vae = pickle.load(open("results-vae/yelp_total_epoch_losses_vae.pkl", "rb"))
yelp_total_kl_losses_vae = pickle.load(open("results-vae/yelp_total_kl_losses_vae.pkl", "rb"))
yelp_total_mi_vae = pickle.load(open("results-vae/yelp_total_mi_vae.pkl", "rb"))

yelp_val_total_epoch_losses_vae = pickle.load(open("results-vae/yelp_val_total_epoch_losses_vae.pkl", "rb"))
yelp_val_total_kl_losses_vae = pickle.load(open("results-vae/yelp_val_total_kl_losses_vae.pkl", "rb"))
yelp_val_total_mi_vae = pickle.load(open("results-vae/yelp_val_total_mi_vae.pkl", "rb"))

In [4]:
print(yelp_total_epoch_losses_vae)
print(yelp_total_kl_losses_vae)
print(yelp_total_mi_vae)

print(yelp_val_total_epoch_losses_vae)
print(yelp_val_total_kl_losses_vae)
print(yelp_val_total_mi_vae)

device = torch.device('cpu')
util = Util()



embedding_size = 512
word2vec_model_name = "word2vec_yelp.model"
word2vec_yelp = Word2Vec.load(word2vec_model_name)
embedding_weights = word2vec_yelp.wv.vectors
embedding_weights = np.vstack((embedding_weights, np.zeros((1,embedding_size))))  # add zero vector for <pad>
embedding_weights = torch.tensor(embedding_weights, device=device)

batch_size = 16
vocabulary_size = len(word2vec_yelp.wv.vocab)
padding_index = vocabulary_size
hidden_size = 1024
latent_size = 1
num_layers = 1
step = 0.25
learning_rate = 0.01
epochs = 2
max_sentence_length = 50


[788.98010263 639.25411682]
[ 7.93675241 25.53156433]
[2.73162553 2.86714189]
[919.79012532 851.97334471]
[ 9.12696004 28.56146727]
[2.77312319 2.73645301]
Loaded Util


In [5]:
model = VAE(hidden_size, num_layers, embedding_weights, latent_size, max_sentence_length, device, synthetic=True).to(device)
model.load_state_dict(torch.load("results-vae/yelp-vae.pwf"))
model.eval()

VAE(
  (encoder): Encoder(
    (embed): Embedding(19840, 512)
    (lstm): LSTM(512, 1024, batch_first=True)
  )
  (stochastic_encoder): StochasticEncoder(
    (hidden_to_mean): Linear(in_features=2048, out_features=1, bias=True)
    (hidden_to_logvar): Linear(in_features=2048, out_features=1, bias=True)
  )
  (stochastic_decoder): StochasticDecoder(
    (latent_to_hidden): Linear(in_features=1, out_features=2048, bias=True)
  )
  (decoder): Decoder(
    (embed): Embedding(19840, 512)
    (lstm): LSTM(512, 1024, batch_first=True)
    (linear): Linear(in_features=1024, out_features=19840, bias=True)
  )
)

In [6]:
yelp_test_data_original, yelp_test_data_padded = util.load_data("yelp_data/yelp.test.txt", max_sentence_length, with_labels=True)

In [7]:
yelp_test_inputs, yelp_test_targets, yelp_test_lengths = \
                util.get_batches_text(yelp_test_data_original, yelp_test_data_padded, batch_size, padding_index, word2vec_yelp, '_unk')

In [8]:
loss, kl, ppl, mutual_info = util.test_vae(model, yelp_test_inputs[:10], yelp_test_targets[:10], yelp_test_lengths[:10], padding_index, max_sentence_length, device=device)

In [9]:
print(loss, kl, ppl, mutual_info)

873.6742292404175 26.759928703308105 4.688080802596484e+84 2.7549392700195314
