In [53]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence as pack

import os
import numpy as np
# import pandas as pd
import pickle
import random
from tqdm import tqdm
# from collections import defaultdict

# import matplotlib.pyplot as plt
# import matplotlib.gridspec as gridspec
# import seaborn as sns

from AttMem2Seq import AttMem2Seq
from InformationCenter import InformationCenter
# from InformationCenter_sample_train import InformationCenter
from LoggingModule import LoggingModule

#import gc #;gc.collect() # garbage collector 
import sys
# raise NotImplementedError

torch.manual_seed(12)
torch.cuda.manual_seed(12)
np.random.seed(12)
random.seed(12)
torch.backends.cudnn.deterministic=True

In [54]:
# MODEL DECLARATION
IC = InformationCenter()
model = AttMem2Seq(information_center=IC)
if torch.cuda.is_available(): model.cuda()

model_name :  AttMem2Seq_v5.3_Ptr&Mem2AttGate

voc_size : 19549
num usr : 19675
num_prd : 80256
num_rating : 5
size of train data : 655934
size of dev   data : 93703
size of test  data : 187396

emb_size: 512
hidden_size: 512
latent_size: 64
lstm_num_layers: 2
dropout: 0.2
max_gen_length: 70
num_memory: 10

batch size: 50
maximum epoch: 100
check step: 500
evaluation step: 10000
gradient clipping: 5
initial learning rate: 0.002
learning rate decaying: 0.97
decay after: 10 epoch
smoothing constant: 0.95

word distribution stablizing epsilon: 1e-06
			
************************** Additional Note **************************
			
			dropout only for lstm and meta embedding

			gradient clip : 3

	(AttMem2Seq.py) : no drop out for all parameters
	
	epsilon plus after pack prediction
	(you should change loss calculation in main.py and eval_nll from AttMem2Seq.py)
*********************************************************************
			
  copy model python file to debug folder 




In [55]:
checkpoint = torch.load(os.path.join(IC.param_dir, IC.model_name+'.pth'))
model.load_state_dict(checkpoint['state_dict'])

In [56]:
model.eval()

AttMem2Seq(
  (Encoder): Encoder(
    (word_embed): Embedding(19549, 512, padding_idx=0)
    (attribute_encoder): AttributeEncoder(
      (user_emb): MetaEmbedding(
        (meta_emb): Embedding(19675, 64)
        (dropout): Dropout(p=0.2)
      )
      (product_emb): MetaEmbedding(
        (meta_emb): Embedding(80256, 64)
        (dropout): Dropout(p=0.2)
      )
      (rating_emb): MetaEmbedding(
        (meta_emb): Embedding(5, 64)
        (dropout): Dropout(p=0.2)
      )
      (H): Linear(in_features=192, out_features=2048, bias=True)
      (W): Linear(in_features=576, out_features=1, bias=True)
    )
    (memory_encoder): MemoryEncoder(
      (word_embed): Embedding(19549, 512, padding_idx=0)
      (LSTM): LSTM(512, 256, batch_first=True, bidirectional=True)
      (W): Linear(in_features=1024, out_features=512, bias=True)
      (z): Linear(in_features=512, out_features=1, bias=True)
    )
    (Ws): Linear(in_features=1600, out_features=1, bias=True)
  )
  (Decoder): Decoder(
    

In [101]:
def argmax_ij(a):
    pt = np.argmax(a)
    i_ = int(pt/a.shape[1])
    j_ = pt-i_*a.shape[1]
    return i_, j_

In [287]:
def beam_search(model, IC, id, user, product, rating, K=10):
    # Validity Check
    (_, _, user_in_data, product_in_data, _, _, _, _) = IC.split_data(IC.test_data[id:id+1], sorting=True, padding=True)
    if user_in_data[0]!=user or product_in_data[0]!=product:
        print("inconsistent id, user, product tuple")
        return
    
    # Language Generation
    maxlen = IC.max_gen_length

    # Extract Memory
    memory_index_list = IC.memory_index_test_list
    memory_score_list = IC.memory_score_test_list
    memory_index = memory_index_list[id]
    memory_score = memory_score_list[id]
    argmax_index = np.argsort(-memory_score)
    sampled_index = memory_index[argmax_index[:IC.num_memory]]
    num_memory = len(sampled_index)
    # memory word and length
    w = IC.word_train[sampled_index].copy()
    l = IC.length_train[sampled_index].copy()
    memory_word = []
    memory_length = []
    for i in range(num_memory):
        memory_word.append(w[i][1:-1]) # remove <sos> <eos>
        memory_length.append(l[i]-2) # remove <sos> <eos>
    memory_word, memory_length = np.array(memory_word), np.array(memory_length)
    # sorting w.r.t. memory length
    sorted_idx      = np.argsort(-memory_length)
    memory_word 	= memory_word 	[sorted_idx]
    memory_length   = memory_length [sorted_idx]
    # padding 
    memory_word = np.array(IC.add_pad(memory_word, memory_length))
    # variable wrapping
    memory_word = IC.to_var(torch.from_numpy(memory_word.astype(np.int64)))
    memory_for = [0]*num_memory
    
    user, product, rating = np.array([user]*K), np.array([product]*K), np.array([rating]*K)
    
    # Inital Hidden State with meta information
    attributes = model.Encoder.attribute_encoder(user=user, product=product, rating=rating) # 1, 3, latent_size
    decoder_hidden_init = model.Encoder.attribute_encoder.decoder_hidden_init(attributes=attributes)
    # Generated Sentence Batch
    sentence_batch = np.zeros((K, maxlen+1), dtype=np.int64) + IC.word2idx['<pad>']
    sos_word_idx = np.zeros((K), dtype=np.int64)+int(IC.word2idx['<sos>'])
    word_idx = sos_word_idx
    sentence_batch[:, 0] = word_idx
    likelihood_batch = np.zeros((K), dtype=np.float64) # likelihood: log-probability
    for i in range(1, maxlen+1):
        # LSTM next step
        word_var = IC.to_var(torch.from_numpy(word_idx.astype(np.int64))).view(-1, 1)
        word_emb = model.Encoder.word_embedding(word_var)
        # print("word_emb: ",word_emb.size())
        decoder_hidden, decoder_hidden_init = model.Decoder.LSTM(word_emb=word_emb, length=[1]*K, initial_hidden=decoder_hidden_init)
        # print("decoder_hidden: ",decoder_hidden.size())
        # print("decoder_hidden_init: ",decoder_hidden_init[0].size(),decoder_hidden_init[1].size())
        # encoder attribute reduce using decoder hidden state
        attribute_reduced, _ = model.Encoder.attribute_encoder.reduce_upr_with_attention(
                user_emb=attributes[:, 0, :], 
                product_emb=attributes[:, 1, :],
                rating_emb=attributes[:, 2, :],
                decoder_hidden=decoder_hidden)
        # print("attribute_reduced: ",attribute_reduced.size())
        # decoder hidden with attribute
        hidden_with_attribute = model.Decoder.hidden_with_attribute(
            attribute_reduced=attribute_reduced, 
            context=decoder_hidden)
        # print("hidden_with_attribute: ",hidden_with_attribute.size())
        # explicit memory representation
        memory_hidden = model.Encoder.memory_encoder(word=memory_word, length=memory_length) # 1*num_memory, memory length, hidden_size
        # print("memory_hidden: ",memory_hidden.size())

        # memory structuring: from (K*num_memory, memory_length, hidden_size) to (K, num_memory*memory_length, hidden_size)
        l = memory_length
        m = memory_hidden # num_memory, memory_length, hidden_size
        # from (num_memory, memory_length, hidden_size) to (num_memory*memory_length, hidden_size)
        memory_flat = m[0][:l[0]] # length, hidden_size
        for j in range(1, num_memory):
            m_ = m[j][:l[j]]
            memory_flat = torch.cat([memory_flat, m_], dim=0)
        # print("memory_flat: ",memory_flat.size(), l.sum())        
        # memory_flat: length*num_memory, hidden_size
        memory_batch = [memory_flat]*K
        
        # reducing memory using attention with hidden_with_attribute
        memory_reduced, memory_attention = model.Encoder.memory_encoder.reduce_memory_with_attention(
        memory_hidden=memory_batch, 
        query_batch=hidden_with_attribute,
        length=[1]*K) # K, decoder_length, hidden_size
        # print("memory_reduced: ",memory_reduced.size())
        # decoder hidden with memory
        hidden_with_memory = model.Decoder.hidden_with_memory(
            memory_reduced=memory_reduced, 
            context=decoder_hidden)
        # print("hidden_with_memory: ",hidden_with_memory.size())

        # memory or attribute? explicit(specific) or implicit(general)?
        memory_gate = model.Encoder.memory_or_attribute(
            context=torch.cat([
                attribute_reduced,
                memory_reduced,
                decoder_hidden,
                word_emb,
                ], dim=2)) # K, decoder_length, 1
        # print("memory gate: ", memory_gate.size())
        hidden_final = (memory_gate*hidden_with_memory) + (1-memory_gate)*hidden_with_attribute
        # print("hidden_final: ",hidden_final.size())
        # predicted word distribution
        word_distribution = F.softmax(model.Decoder.word_predict(hidden_final), dim=2)
        # print("word_distribution : ", word_distribution.size())
        # copying mechanism
        copy_gate = model.Decoder.copy_gate(
            context=torch.cat([
                attribute_reduced,
                memory_reduced,
                decoder_hidden,
                word_emb,
                ], dim=2)
            )
        # print("copy_gate: ", copy_gate.size())

        word_distribution = (1-copy_gate) * word_distribution
        memory_attention = copy_gate * memory_attention
        # print(memory_attention.size())

        
        
        l = memory_length
        m = memory_word # num_memory, memory_length, hidden_size
        # from (num_memory, memory_length, hidden_size) to (num_memory*memory_length, hidden_size)
        memory_flat = m[0][:l[0]] # length, hidden_size
        for j in range(1, num_memory):
            m_ = m[j][:l[j]]
            memory_flat = torch.cat([memory_flat, m_], dim=0)
        # print("memory_flat: ",memory_flat.size(), l.sum())
        # memory_flat: length*num_memory, hidden_size
        memory_word_flatten = [memory_flat]*K
        # print("memory_word_flatten: ",memory_word_flatten)
        
        
        
        memory_word_padded = model.Encoder.memory_encoder.memory_word_padding(
            memory_word_flatten=memory_word_flatten,
            ) # 1, num_memory*memory_length
        memory_word_padded = memory_word_padded.unsqueeze(dim=1).repeat(1, memory_attention.size(1), 1)
        # memory_word_flatten: K, num_memory*memory_length
        # memory_attention: K, decoder_length, num_memory*memory_length
        word_distribution = word_distribution.scatter_add_(2, memory_word_padded, memory_attention) # K, decoder_length, vocab_size

        # print("word_distribution: ",word_distribution)
        word_distribution = word_distribution.cpu().data.numpy()[:,0,:] # K, vocab_size
        # print("word_distribution: ",word_distribution.shape)
        
        
        
        for ibest in range(K):
            if sentence_batch[ibest, i-1]==IC.word2idx['<eos>']:
                word_distribution[ibest] = likelihood_batch[ibest]
            else:
                word_distribution[ibest] = likelihood_batch[ibest]+np.log(word_distribution[ibest])
            
        # likelihood: log-probability
        # word_distribution: log-probability , ndarray (K, vocab_size)
        # choose k best - decoder_hidden_init, word_idx
        h_init, c_init = decoder_hidden_init
        h_init, c_init = h_init.cpu().data, c_init.cpu().data
        best_h_init, best_c_init = torch.zeros(2, K, IC.hidden_size), torch.zeros(2, K, IC.hidden_size)
        best_word_idx = np.zeros((K), dtype=np.int64)
        best_sentence = np.zeros((K, maxlen+1), dtype=np.int64)
        for ii in range(K):
            i_, j_ = argmax_ij(word_distribution)
            best_h_init[:, ii, :], best_c_init[:, ii, :] = h_init[:, i_, :], c_init[:, i_, :]
            if sentence_batch[i_, i-1] == IC.word2idx['<eos>']:
                best_word_idx[ii] = IC.word2idx['<eos>']
                best_sentence[ii] = sentence_batch[i_]
                best_sentence[ii][i] = IC.word2idx['<eos>']
                likelihood_batch[ii] = word_distribution[i_, 0]
                word_distribution[i_, :] = -np.inf
            else:
                best_word_idx[ii] = j_
                best_sentence[ii] = sentence_batch[i_]
                best_sentence[ii][i] = j_
                likelihood_batch[ii] = word_distribution[i_, j_]
                word_distribution[i_, j_] = -np.inf
            if i==1:
                word_distribution[:, j_]=-np.inf
            else:
                word_distribution[i_, j_] = -np.inf

                    
        h_init, c_init = best_h_init, best_c_init
        h_init, c_init = IC.to_var(h_init), IC.to_var(c_init)
        decoder_hidden_init = (h_init, c_init)
        word_idx = best_word_idx
        sentence_batch = best_sentence
        
        # print("word_idx (id)   : ",word_idx)
        # print("word_idx (token): ",[IC.idx2word[x] for x in word_idx])
        
        # All batch is ended -- Terminate Condition
        if (sentence_batch[:, i]==IC.word2idx['<eos>']).mean()==1: break
            
    sentence_batch = sentence_batch[:, 1:] # remove <sos> symbol
    sentences = []
    for i in range(K):
        s = sentence_batch[i]
        s = s[s!=IC.word2idx['<eos>']] # remove <eos> symbol
        s = s[s!=IC.word2idx['<pad>']] # firstly initialize with sentence_batch = np.zeros()+<pad>
        sentences.append(list(s))

    return sentences

In [292]:
# np.random.seed(12)
# random.seed(12)
sample_index = np.random.randint(0, IC.num_test+1)
sample_index = 14
print("sample_index: ",sample_index)
(word, length, 
user, product, rating,
memory_for, memory_word, memory_length) = IC.split_data(IC.test_data[sample_index:sample_index+1], sorting=True, padding=True)

sample_index:  14


In [293]:
result = beam_search(model, IC=IC, id=sample_index, user=user[0], product=product[0], rating=rating[0], K=30)
sentences = [" ".join([IC.idx2word[x] for x in xs]) for xs in result]
for s in sentences:
    print(s)
    print("*****************************************************")

i loved this book , could n't put it down ! ca n't wait for the next one to come out ! love it !
*****************************************************
i loved this book , could n't put it down ! ca n't wait for the next one to come out ! great writing !
*****************************************************
i loved this book , could n't put it down . ca n't wait for the next one to come out ! love it !
*****************************************************
i loved this book , could n't put it down , ca n't wait for the next one to come out ! great writing !
*****************************************************
i loved this book , could n't put it down ! ca n't wait for the next one to come out ! very well worth it !
*****************************************************
i loved this book , could n't put it down . ca n't wait for the next one to come out ! great writing !
*****************************************************
i loved this book , could n't put it down ! ca n't wait for the n

In [225]:
decoder_hidden

NameError: name 'decoder_hidden' is not defined

In [85]:
[IC.idx2word[x] for x in result[2]]

['i',
 'was',
 "n't",
 'sure',
 'what',
 'to',
 'expect',
 'when',
 'i',
 'started',
 'this',
 'book',
 ',',
 'but',
 'i',
 'really',
 'enjoyed',
 'it',
 '.',
 'it',
 'was',
 'a',
 'good',
 'read',
 '.']