In [1]:
import mxnet as mx
import numpy as np

# for printing out the training information to console
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

from lstm import init_lstm, lstm_cell, get_lstm_init_states
from text_io import get_unified_vocab, text_2_indices

In [3]:
params_dir = '../params'
expt_name  = 'july28'
last_iteration = 97
print('loading pretrained model %s/%s at epoch %d' % (params_dir, expt_name, last_iteration))
_, arg_params, __ = mx.model.load_checkpoint('%s/%s' % (params_dir, expt_name), last_iteration)

loading pretrained model ../params/july28 at epoch 97


In [4]:
def lstm_inference_symbol(num_layer, num_hidden, num_labels, dropout=0.0):
    param_cells, last_states = init_lstm(num_layer)
    
    data = mx.sym.Variable('data')
    embed_weight=mx.sym.Variable("embed_weight")
    
    hidden = mx.sym.Embedding(data=data, input_dim=num_labels, weight=embed_weight, output_dim=num_hidden, name='embed')
    
    # stack layers of LSTM for 1 sequence
    for i in range(num_layer):
        dp = 0.0 if i == 0 else dropout
        next_state = lstm_cell(
            num_hidden,
            indata=hidden,
            prev_state=last_states[i],
            param=param_cells[i],
            seqidx=0,
            layeridx=i,
            dropout=dp
        )
        hidden = next_state.h
        last_states[i] = next_state
    
    if dropout > 0.0:
        hidden = mx.sym.Dropout(data=hidden, p=dropout)
    
    output = []
    for state in last_states:
        # very important to be in this order!!!
        output.append(state.h)
        output.append(state.c)
    
    return mx.sym.Group(output)

class LSTMInferenceModel(object):
    def __init__(self, num_layer, num_hidden, num_labels, arg_params, ctx=mx.cpu(), dropout=0.0):
        
        self.sym = lstm_inference_symbol(num_layer, num_hidden, num_labels, dropout)
        self.num_labels = num_labels
        
        batch_size = 1
        init_states = get_lstm_init_states(num_layer, num_hidden, batch_size)
        data_shape = [("data", (batch_size, ))]

        input_shapes = dict(init_states + data_shape)
        self.executor = self.sym.simple_bind(ctx=ctx, **input_shapes)

        # copy the transition parameters over to executor
        for key in self.executor.arg_dict.keys():
            if key in arg_params:
                arg_params[key].copyto(self.executor.arg_dict[key])

        state_name = []
        for i in range(num_layer):
            # very important to be in this order!!!
            state_name.append("l%d_init_h" % i)
            state_name.append("l%d_init_c" % i)

        self.states_dict = dict(zip(state_name, self.executor.outputs)) # this transfer the output of previous state to current

        self.cls_weight = arg_params['cls_weight']
        self.cls_bias   = arg_params['cls_bias']
        self.ctx = ctx

    def predict(self, x):
        # another symbolic graph here... 
        data       = mx.sym.Variable('data')
        cls_weight = mx.sym.Variable("cls_weight")
        cls_bias   = mx.sym.Variable("cls_bias")
    
        pred = mx.sym.FullyConnected(
            data       = data,
            num_hidden = self.num_labels,
            weight     = cls_weight,
            bias       = cls_bias,
            name       = 'pred'
        )
        
        output = mx.sym.SoftmaxOutput(
            data = pred,
            name = 'softmax'
        )
        
        executor = output.bind(ctx=self.ctx, args={
            'data': x,
            'cls_weight': self.cls_weight,
            'cls_bias'  : self.cls_bias,
            'softmax_label': mx.nd.array([0]) # this is a dummy label, just meant to fulfill the requirements...
        })
        
        executor.forward()
        prob = np.squeeze(executor.outputs[0].asnumpy())
        return prob
        
    def forward(self, input_data, new_seq=False):
        # input data is of shape (seqlen, dim)
        # input data has to be of type numpy.array
        if new_seq == True:
            # this is meant to reset the initial states to 0.0
            for key in self.states_dict.keys():
                self.executor.arg_dict[key][:] = 0.0
        
        for x in input_data:
            y = mx.nd.array([x]) # put it in a [] so that the shape becomes (1, xxx)
            y.copyto(self.executor.arg_dict["data"])
            self.executor.forward() # move forward one step...
            for key in self.states_dict.keys():
                # copy the hidden and c to the init_states for the next sequence
                self.states_dict[key].copyto(self.executor.arg_dict[key])
        
        return self.predict(self.states_dict['l2_init_h']) # change this to use last layer next time...    

In [5]:
arg_params['embed_weight'].shape

(42569, 200)

In [7]:
train_en_file = '../data/train.en'
train_ru_file = '../data/train.ru'
word2idx, idx2word = get_unified_vocab(train_en_file, train_ru_file, 80)

In [8]:
num_labels = len(idx2word)
print(num_labels)

42569


In [9]:
num_layers = 3
num_hidden = arg_params['embed_weight'].shape[1]
model = LSTMInferenceModel(num_layers, num_hidden, num_labels, arg_params)

In [10]:
# get the word...
def get_word(prob, idx2word, sample=True):
    if sample:
        cdf = np.cumsum(prob) / np.sum(prob)
        idx = np.argmax(np.random.rand(1) < cdf)
    else:
        idx = np.argmax(prob)
    return idx, idx2word[idx]

In [11]:
def translate(text, model2, idx2word, reverse=True, sample=True):
    data = text_2_indices(word2idx, text)
    if reverse:
        data[:-1] = np.flipud(data[:-1])
    eos_idx = word2idx['<EOS>']
    
    words = ''
    prob = model2.forward(data, new_seq=True)
    idx, word = get_word(prob, idx2word, sample)
    while idx != eos_idx:
        words += word + ' '
        prob = model2.forward(np.array([idx]))
        idx, word = get_word(prob, idx2word, sample)
    
    return words.strip()

In [12]:
X = text_2_indices(word2idx, '$10,000 Gold?')
for x in X:
    print(x, ':', idx2word[x])

2 : $
33 : 10
11 : ,
24 : 000
6310 : gold
400 : ?
42567 : <EOS>


In [13]:
translate('$10,000 Gold?', model, idx2word, sample=True)

'тяжелое положение китая также приведет к идеям участия соседней <UNK> , наряду с ее акцентом на борьбу <UNK> к согласованной эпидемий <UNK> промышленности для того , чтобы принимать решительность в том , сколько государства полностью <UNK> с <UNK> <UNK> <UNK> . но без разрешения такого разрешения поведения ценностей , <UNK> к их собственным обязательствам , вызывают гораздо обвинения для <UNK> соседа , нежели построение политических исследований , таких как культура <UNK> спорта на жизнью 1907 <UNK> и акции могут <UNK> свое внимание от определенных <UNK> химического животных ежегодно .'