In [1]:
import dynet as dy
import numpy as np
from time import time
import random, os

In [None]:
class FeedForward:
    def __init__(self, model, num_input, num_hidden, num_out, act):
        pc = model.add_subcollection()
        self.W1 = model.add_parameters((num_hidden, num_input))
        self.W2 = model.add_parameters((num_out, num_hidden))
        self.b1 = model.add_parameters((num_hidden))
        self.b2 = model.add_parameters((num_out))
        self.pc = pc #?
        self.act = act
        self.spec = (num_input, num_hidden, num_out, act)

    def __call__(self, input_exp):
        W1 = self.W1.expr()
        W2 = self.W2.expr()
        b1 = self.b1.expr()
        b2 = self.b2.expr()

        g = self.act
        return W2 * g(W1 * input_exp + b1) + b2

    def param_collection(self):
        return self.pc #?

    @staticmethod
    def from_spec(spec, model):
        num_input, num_hidden, num_out, act = spec
        return FeedForward(model, num_input, num_hidden, num_out, act)

In [3]:
class Shortlister:
    def __init__(self, context, non_personalized=False):

        self.model = dy.ParameterCollection()

        self.opts = context.opts
        self.char_vocab = context.char_vocab
        self.word_vocab = context.word_vocab
        self.skill_vocab = context.skill_vocab

        self.char_count = context.char_count
        self.word_count = context.word_count
        self.skill_count = context.skill_count

        cdim = self.opts['dim_char_embeddings']
        wdim = self.opts['dim_word_embeddings']
        ldim = self.opts['dim_lstm_outputs']

        self.dim_lstm_outputs = self.opts['dim_lstm_outputs']
        self.char_lookup = self.model.add_lookup_parameters((self.char_vocab.size()+1, cdim))
        self.word_lookup = self.model.add_lookup_parameters((self.word_vocab.size()+1, wdim))
        self.skill_lookup = self.model.add_lookup_parameters((self.skill_vocab.size()+1, ldim * 2))# why +1 , why * 2

        # # Hidden layer dimension
        dim_wlstm_input = 2 * cdim + wdim
        dim_wlstm_output = ldim

        # LSTM parameters
        self.char_fwd_lstm = dy.CoupledLSTMBuilder(1, cdim, cdim, self.model)
        self.char_bwd_lstm = dy.CoupledLSTMBuilder(1, cdim, cdim, self.model)
        self.word_fwd_lstm = dy.CoupledLSTMBuilder(1, dim_wlstm_input, dim_wlstm_output, self.model)
        self.word_bwd_lstm = dy.CoupledLSTMBuilder(1, dim_wlstm_input, dim_wlstm_output, self.model)
        #
        # FeedForward parameters
        dim_ff_input = 2 * dim_wlstm_output
        dim_ff_output = self.skill_vocab.size()
        if self.opts['act'] == 'relu':
            act = dy.rectify
        elif self.opts['act'] == 'selu':
            act = dy.selu
        elif self.opts['act'] == 'tanh':
            act = dy.tanh
        else:
            assert False, "not supported activation function"

        if non_personalized == True:
            self.opts['non_personalized'] = True

        ff_size_multiple = 2 if 'non_personalized' not in self.opts else 1 # Non-personalized models do not have this option

        self.feed_forward = FeedForward(self.model, ff_size_multiple * dim_ff_input, ff_size_multiple * dim_ff_input, dim_ff_output, act)
        #
        # Scale for skill enablement
        self.scale = self.model.add_parameters((dim_ff_output)) if 'non_personalized' not in self.opts else None
        self.load()

    # Get embedding from lookup parameter
    def get_emb(self, cnts, lookup, idx):
        # Unseen type
        if idx not in cnts:
            return lookup[0]
        return lookup[idx+1]

    def compute_skill_summary(self, datum, utterance_expr):
        dim_skill_summary = 2 * self.dim_lstm_outputs
        att_weights, att_skills = [], []

        if len(datum.enabled_skills_idx) == 0:
            return dy.zeroes((dim_skill_summary, 1))

        if len(datum.enabled_skills_idx) == 1:
            for s in datum.enabled_skills_idx:
                return self.get_emb(self.skill_count, self.skill_lookup, s)

        skill_count = self.skill_count
        skill_lookup = self.skill_lookup

        for s in datum.enabled_skills_idx:
            skill_emb = self.get_emb(skill_count, skill_lookup, s)
            att_skills.append(skill_emb)
            att_weight = dy.dot_product(utterance_expr, skill_emb)
            att_weights.append(att_weight)

        # Non-empty enabled skills and if len(att_weights) > 0:
        normalized_weights = dy.softmax(dy.concatenate(att_weights))
        weighted_att_skills = [att_skills[i]*normalized_weights[i] for i in xrange(len(att_weights))]
        skill_summary = dy.esum(weighted_att_skills)

        return skill_summary

    def disable_dropout(self):
        self.char_fwd_lstm.disable_dropout()
        self.char_bwd_lstm.disable_dropout()
        self.word_fwd_lstm.disable_dropout()
        self.word_bwd_lstm.disable_dropout()

    def build_graph(self, datum):
        char_vocab = self.char_vocab
        word_vocab = self.word_vocab
        skill_vocab = self.skill_vocab

        char_count = self.char_count
        word_count = self.word_count
        skill_count = self.skill_count

        char_lookup = self.char_lookup
        word_lookup = self.word_lookup
        skill_lookup = self.skill_lookup

        char_fwd_lstm = self.char_fwd_lstm
        char_bwd_lstm = self.char_bwd_lstm

        word_fwd_lstm = self.word_fwd_lstm
        word_bwd_lstm = self.word_bwd_lstm

        scale = dy.parameter(self.scale) if 'non_personalized' not in self.opts else None

        wlstm_input_vec = []
        for word in datum.words:
            c_fwd = char_fwd_lstm.initial_state()
            c_bwd = char_bwd_lstm.initial_state()
            clstm_input_vec = [self.get_emb(char_count, char_lookup, char_vocab.index_of(c)) for c in word]
            c_fwd_outs = c_fwd.transduce(clstm_input_vec)
            c_bwd_outs = c_bwd.transduce(reversed(clstm_input_vec))
            w_emb = self.get_emb(word_count, word_lookup, word_vocab.index_of(word))
            wlstm_input = dy.concatenate([c_fwd_outs[-1], c_bwd_outs[-1], w_emb])
            wlstm_input_vec.append(wlstm_input)

        w_fwd = word_fwd_lstm.initial_state()
        w_bwd = word_bwd_lstm.initial_state()
        w_fwd_out = w_fwd.transduce(wlstm_input_vec)
        w_bwd_out = w_bwd.transduce(reversed(wlstm_input_vec))

        utterance_expr = dy.concatenate([w_fwd_out[-1], w_bwd_out[-1]])

        # Skill summary
        ff_input = utterance_expr
        if 'non_personalized' not in self.opts:
            skill_summary_expr = self.compute_skill_summary(datum, utterance_expr)
            ff_input = dy.concatenate([utterance_expr, skill_summary_expr])

        scores = self.feed_forward(ff_input)

        if self.opts.get('use_skill_bias', False) == True:
            scores += dy.cmult(scale, dy.inputVector(datum.skill_enablement)) # add skill bias (n-hot vector of the skill enablement)

        return scores

    @property
    def model(self):
        return self.model

    def load(self):
        self.model.populate(self.opts['model_path'] + '/model.bin')

    def predict(self, datum):
        scores = self.build_graph(datum)
        scores = dy.softmax(scores)
        probs = scores.npvalue()
        pred = np.argmax(probs)
        return pred, scores