In [1]:
%matplotlib inline

import random
from allennlp.commands.elmo import ElmoEmbedder
import numpy as np
easy = False
if easy:
    import dynet as dy
else:
    import _dynet
    import sys 
    sys.argv.append('--dynet-devices')
    sys.argv.append('GPU:0')
    dyparams = _dynet.DynetParams()

    # Fetch the command line arguments (optional)
    dyparams.from_args()

    # Set some parameters manualy (see the command line arguments documentation)
    dyparams.set_mem(2048*4)
    dyparams.set_random_seed(666)
    # Initialize with the given parameters
    dyparams.init() # or init_from_params(dyparams)

    import dynet as dy

In [2]:
import pickle

(i2o,o2i,type_o2i,type_i2o) = pickle.load(open('vocab.pkl','rb'))


In [3]:
import sys
def is_number(s):
    try:
        float(s)
        return float(s)
    except ValueError:
        return 0.0

class CopyNetwork:
    def __init__(self, enc_layers, dec_layers, type_embeddings_size,embeddings_size, 
                 enc_state_size, dec_state_size,output_embedding_size, vocab_size, dropout = 0):
        self.model = dy.Model()
        self.type_embeddings = self.model.add_lookup_parameters((len(type_o2i), type_embeddings_size))
        self.output_embeddings = self.model.add_lookup_parameters((vocab_size, output_embedding_size))
       
        # the rnns
        self.ENC_RNN_F = dy.LSTMBuilder(enc_layers, 1+type_embeddings_size+embeddings_size, enc_state_size/2, self.model)
        self.ENC_RNN_F.set_dropout(dropout)
        self.ENC_RNN_B = dy.LSTMBuilder(enc_layers, 1+type_embeddings_size+embeddings_size, enc_state_size/2, self.model)
        self.ENC_RNN_B.set_dropout(dropout)
        
        self.DEC_RNN = dy.LSTMBuilder(dec_layers, enc_state_size+output_embedding_size+2, dec_state_size, self.model)
        self.DEC_RNN.set_dropout(dropout)
        
        # attention weights
        self.attention_w1 = self.model.add_parameters((enc_state_size, enc_state_size))
        self.attention_w2 = self.model.add_parameters((enc_state_size, dec_state_size))
        self.attention_v = self.model.add_parameters((1, enc_state_size))

        
        self.copy_w1 = self.model.add_parameters((enc_state_size, enc_state_size))
        self.copy_w2 = self.model.add_parameters((enc_state_size, dec_state_size))
        self.copy_v = self.model.add_parameters((1, enc_state_size))

        # project the rnn output to a vector of VOCAB_SIZE length
        self.mode_w = self.model.add_parameters((2, dec_state_size))
        self.mode_b = self.model.add_parameters((2))
        
        # project the rnn output to a vector of VOCAB_SIZE length
        self.output_w = self.model.add_parameters((vocab_size, dec_state_size))
        self.output_b = self.model.add_parameters((vocab_size))
        
        self.enc_state_size = enc_state_size
        self.type_embeddings_size = type_embeddings_size
        self.embeddings_size = embeddings_size
        self.output_embedding_size = output_embedding_size
        
    def load_weights(self,weight_file):
        self.model.populate(weight_file)
        
    def _run_rnn(self, init_state, input_vecs):
        s = init_state

        states = s.add_inputs(input_vecs)
        rnn_outputs = [s.output() for s in states]
        return rnn_outputs
    def _encode_string(self, embedded_string,RNN):
        initial_state = RNN.initial_state()

        # run_rnn returns all the hidden state of all the slices of the RNN
        hidden_states = self._run_rnn(initial_state, embedded_string)

        return hidden_states
    
    def _attend(self, input_vectors, state, w1, w2,v ):
        attention_weights = []

        w2dt = w2 * state.h()[-1]
        for input_vector in input_vectors:
            
            attention_weight = v * dy.tanh(w1 * input_vector + w2dt)
            attention_weights.append(attention_weight)
        attention_weights = dy.softmax(dy.concatenate(attention_weights))

        output_vectors = dy.esum(
            [vector * attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)])
        return output_vectors, attention_weights
   
    def _embed(self,pos,pos_embed):
        return [pos_embed[p] for p in pos]
    
    def get_probs(self, w, b, rnn):
        return w*rnn+b
    

In [4]:
copy_network = CopyNetwork(3, 3,8, 1024, 1024, 1024, len(o2i),len(o2i),dropout = 0.25)
copy_network.load_weights('Conversationalist_3_3_1024_1024_1024_elmo_embedded_training_data_merged_130k_0_1.model')

In [5]:
if easy:
    elmo = ElmoEmbedder()
else:
    elmo = ElmoEmbedder(options_file='~/DownloadedModels/Elmo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json',
                        weight_file='~/DownloadedModels/Elmo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5',
                        cuda_device=1)

In [6]:
import random
import numpy
i2o = {i:o for o,i in o2i.items()}


def translate(model,input_string,type_string,embedded_string,max_len=30,eos='[[CLS]]'):
        dy.renew_cg()
        numbered = [is_number(s) for s in input_string]
        embedded_string = embedded_string[2]
        embedded_string = [dy.inputTensor(e) for e in embedded_string]
        
        
        embedded_string = [dy.concatenate([dy.inputTensor(np.array([n])),
                                           e,
                                           model.type_embeddings[type_o2i[t]]]) for n,e,t in zip(numbered,
                                                                                                embedded_string,
                                                                                                type_string)]
        encoded_string_f = model._encode_string(embedded_string,model.ENC_RNN_F)
        encoded_string_b = model._encode_string(list(reversed(embedded_string)),model.ENC_RNN_B)
        encoded_string = [dy.concatenate([f,b]) for f,b in zip(encoded_string_f,reversed(encoded_string_b))]

        mode_probs = []
        gen_probs = []
        copy_probs = []
        
        losses = []
        generate = random.random() < 0.01
        output = []
        prev_mode = 0
        prev_tok = o2i['<START>']
        rnn_state = model.DEC_RNN.initial_state().add_input(
                dy.vecInput(model.enc_state_size+model.output_embedding_size+2)
            )
        for _ in range(max_len):
            attended_encoding,_ = model._attend(encoded_string, rnn_state, 
                                                       model.attention_w1, model.attention_w2,model.attention_v)
            _,p_copy = model._attend(encoded_string, rnn_state, 
                                                       model.copy_w1, model.copy_w2,model.copy_v)
            
            mode_vec = np.zeros(2)
            mode_vec[prev_mode] = 1
            rnn_input = dy.concatenate([attended_encoding,
                                        model.output_embeddings[prev_tok],
                                        dy.inputTensor(mode_vec)
                                       ])
            
            
            
            rnn_state = rnn_state.add_input(rnn_input)
            
            p_mode = model.get_probs(model.mode_w,model.mode_b,rnn_state.output())
            
            p_gen = model.get_probs(model.output_w,model.output_b,rnn_state.output())
            
            p_mode = p_mode.value()
            p_copy = p_copy.value()
            p_gen = p_gen.value()
            
            
            if p_mode[0] > p_mode[1]:
                prev_mode = 0
                prev_tok = p_gen.index(max(p_gen))
                output.append(i2o[p_gen.index(max(p_gen))])
            else:
                prev_mode = 1
                prev_tok = 0
                output.append(input_string[p_copy.index(max(p_copy))])
            if output[-1] == eos:
                break
        return output

In [53]:
def set_dropout(model,dropout):
    model.DEC_RNN.set_dropout(dropout)
    model.ENC_RNN_F.set_dropout(dropout)
    model.ENC_RNN_B.set_dropout(dropout)
    
    
set_dropout(copy_network,0)

def translate_sample(model,input_string,type_string,embedded_string,mode_temp=1.0,gen_temp=1.0,copy_temp=1.0,max_len=30,eos='[[CLS]]'):
        dy.renew_cg()
        numbered = [is_number(s) for s in input_string]
        embedded_string = embedded_string[2]
        embedded_string = [dy.inputTensor(e) for e in embedded_string]
        
        
        embedded_string = [dy.concatenate([dy.inputTensor(np.array([n])),
                                           e,
                                           model.type_embeddings[type_o2i[t]]]) for n,e,t in zip(numbered,
                                                                                                embedded_string,
                                                                                                type_string)]
        encoded_string_f = model._encode_string(embedded_string,model.ENC_RNN_F)
        encoded_string_b = model._encode_string(list(reversed(embedded_string)),model.ENC_RNN_B)
        encoded_string = [dy.concatenate([f,b]) for f,b in zip(encoded_string_f,reversed(encoded_string_b))]

        mode_probs = []
        gen_probs = []
        copy_probs = []
        
        losses = []
        generate = random.random() < 0.01
        output = []
        prev_mode = 0
        prev_tok = o2i['<START>']
        rnn_state = model.DEC_RNN.initial_state().add_input(
                dy.vecInput(model.enc_state_size+model.output_embedding_size+2)
            )
        probs = []
        for _ in range(max_len):
            attended_encoding,_ = model._attend(encoded_string, rnn_state, 
                                                       model.attention_w1, model.attention_w2,model.attention_v)
            _,p_copy = model._attend(encoded_string, rnn_state, 
                                                       model.copy_w1, model.copy_w2,model.copy_v)
            
            mode_vec = np.zeros(2)
            mode_vec[prev_mode] = 1
            rnn_input = dy.concatenate([attended_encoding,
                                        model.output_embeddings[prev_tok],
                                        dy.inputTensor(mode_vec)
                                       ])
            
            rnn_state = rnn_state.add_input(rnn_input)
            
            p_mode = model.get_probs(model.mode_w,model.mode_b,rnn_state.output())
            if mode_temp != 0:
                mode_prob = dy.softmax(p_mode/mode_temp).value()
                prev_mode = np.argmax(np.random.multinomial(1,mode_prob))
            else:
                mode_prob = dy.softmax(p_mode).value()
                prev_mode = np.argmax(mode_prob)
            
            if prev_mode == 0:
                
                p_gen = model.get_probs(model.output_w,model.output_b,rnn_state.output())
                orig_prob = dy.softmax(p_gen).value()
                if gen_temp != 0:
                    gen_prob = dy.softmax(p_gen/gen_temp).value()
                
                    gen_prob = gen_prob/np.sum(gen_prob)
                    prev_tok = np.argmax(np.random.multinomial(1,gen_prob))
                else:
                    prev_tok = np.argmax(orig_prob)
                orig_prob =orig_prob/np.sum(orig_prob)
                output.append(i2o[prev_tok])
                gen_prob = orig_prob[prev_tok]
            else:
                
                orig_prob = p_copy.value()             
                orig_prob = orig_prob/np.sum(orig_prob)
                if gen_temp != 0:
                    gen_prob = dy.softmax(p_copy/copy_temp).value()                
                    gen_prob = gen_prob/np.sum(gen_prob)
                    copy_tok = np.argmax(np.random.multinomial(1,gen_prob))
                else:
                    copy_tok = np.argmax(orig_prob)          
                               
                prev_tok = 0
                output.append(input_string[copy_tok])
                gen_prob= orig_prob[copy_tok]
            if output[-1] == eos:
                break
            probs.append(mode_prob[prev_mode])
            probs.append(gen_prob)
        return output,probs
    
    
    

In [54]:
import random
import numpy
i2o = {i:o for o,i in o2i.items()}


def translate_beam_search(model,input_string,type_string,embedded_string,beam_width=3,max_len=30,eos='[[CLS]]'):
        dy.renew_cg()
        numbered = [is_number(s) for s in input_string]
        embedded_string = embedded_string[2]
        embedded_string = [dy.inputTensor(e) for e in embedded_string]
        
        
        embedded_string = [dy.concatenate([dy.inputTensor(np.array([n])),
                                           e,
                                           model.type_embeddings[type_o2i[t]]]) for n,e,t in zip(numbered,
                                                                                                embedded_string,
                                                                                                type_string)]
        encoded_string_f = model._encode_string(embedded_string,model.ENC_RNN_F)
        encoded_string_b = model._encode_string(list(reversed(embedded_string)),model.ENC_RNN_B)
        encoded_string = [dy.concatenate([f,b]) for f,b in zip(encoded_string_f,reversed(encoded_string_b))]

        mode_probs = []
        gen_probs = []
        copy_probs = []
        
        losses = []
        generate = random.random() < 0.01
        output = []
        prev_mode = 0
        prev_tok = o2i['<START>']
        rnn_state = model.DEC_RNN.initial_state().add_input(
                dy.vecInput(model.enc_state_size+model.output_embedding_size+2)
            )
        
        beams = [(0,0,[],rnn_state,prev_mode,prev_tok)]
        for _ in range(max_len):
            potentials = []
            for score,sum_score,output, rnn_state, prev_mode,prev_tok in beams:
                
                if prev_tok == o2i[eos]:
                    potentials.append((score,sum_score,
                                    output,rnn_state,prev_mode,prev_tok))
                    continue
                    
                attended_encoding,_ = model._attend(encoded_string, rnn_state, 
                                                           model.attention_w1, model.attention_w2,model.attention_v)
                _,p_copy = model._attend(encoded_string, rnn_state, 
                                                           model.copy_w1, model.copy_w2,model.copy_v)

                mode_vec = np.zeros(2)
                mode_vec[prev_mode] = 1
                rnn_input = dy.concatenate([attended_encoding,
                                            model.output_embeddings[prev_tok],
                                            dy.inputTensor(mode_vec)
                                           ])



                rnn_state = rnn_state.add_input(rnn_input)

                p_mode = model.get_probs(model.mode_w,model.mode_b,rnn_state.output())

                p_gen = model.get_probs(model.output_w,model.output_b,rnn_state.output())

                p_mode = dy.softmax(p_mode).value()
                p_copy = p_copy.value()
                p_gen = dy.softmax(p_gen).value()
                options = []
                
                
                probs = [(p,i) for i,p in enumerate(p_gen)]
                probs = sorted(probs,reverse=True)[:beam_width]
                
                for p,i in probs:
                    sum_score_ = sum_score+np.log(p_mode[0])+np.log(p)
                    score = sum_score_/(len(output)+1)
                    options.append((score,sum_score_,
                                    output + [i2o[i]],rnn_state,0,i))

                probs = [(p,i) for i,p in enumerate(p_copy)]
                probs = sorted(probs,reverse=True)[:beam_width]

                for p,i in probs:
                    sum_score_ = sum_score+np.log(p_mode[1])+np.log(p)
                    score = sum_score_/(len(output)+1)
                    options.append((score,sum_score_,
                                    output + [input_string[i]],rnn_state,1,0))
                
                
                options = sorted(options,reverse=True)
                options = options[:beam_width]
                potentials += options
                
            beams = sorted(potentials,reverse=True)[:beam_width]
            
                    
        return beams

In [55]:
def prepare_sentence(sentence):
    type_string = ['$FROM_FNAME', '$FROM_LNAME', '$FROM_EMAIL', '$TO_FNAME', '$TO_LNAME', '$TO_EMAIL', 
                   '$YEAR', '$MONTH', '$DAY', '$HOUR', '$MINUTE', '$GAP']

    type_string = type_string + ['$BODY']*(len(sentence)-len(type_string))
    embedded = elmo.embed_sentence(sentence)
    return type_string,embedded



sentence =['James', 'Ancelet', 'MjIDf@tCoTs.com', 'Patricia', 'Tikalsky', 'zleDT@PBRam.com', 
           '2019', '8', '10', '8', '1', '$N/A', 
           'I','need','you','to',
           'buy', '15', '$', '50', 'iTunes', 'gift', 'cards', 'for', 'me', 'ASAP', '.',
           'I','need','you','to', 'do','this'
           '\n', '-', 'James', ]

type_string, embedded = prepare_sentence(sentence)

beams = translate_beam_search(copy_network,sentence,type_string,embedded,
                      beam_width=10,max_len=200,eos='[[CLS]]')
print(beams[0])

(-0.010838810535067855, -0.4877464740780535, ['[[$Move:]]', '[[initial_attack]]', '[[$Move:]]', '[[introduce_task]]', '[[$ObligationPushed:]]', 'zleDT@PBRam.com', '$&', '[[answer_are_you_available]]', '[[$ObligationPushed:]]', 'zleDT@PBRam.com', '$&', '[[answer_are_you_available_for_urgent_task]]', '[[$ObligationPushed:]]', 'zleDT@PBRam.com', '$&', '[[answer_are_you_available_to_pick_up_gift_cards]]', '[[$ObligationPushed:]]', 'zleDT@PBRam.com', '$&', '[[provide_information]]', '[[$KEY:]]', '[[scam.gift_cards.brand]]', '[[$VALUE:]]', 'iTunes', '[[$KEY:]]', '[[scam.gift_cards.denomination]]', '[[$VALUE:]]', '$', '50', '[[$KEY:]]', '[[scam.gift_cards.introduced]]', '[[$VALUE:]]', 'True', '[[$KEY:]]', '[[scam.gift_cards.number]]', '[[$VALUE:]]', '15', '[[$KEY:]]', '[[status.on_the_go]]', '[[$VALUE:]]', 'True', '[[$KEY:]]', '[[status.on_the_go]]', '[[$VALUE:]]', '[[CLS]]'], <_dynet.RNNState object at 0x7f35784a2458>, 0, 10)


In [56]:
#Sample and take the best
best_score = -np.inf
best = None
for _ in range(20):
    output,probs = translate_sample(copy_network,  sentence,type_string,
                                    embedded,
                                    gen_temp=0.1,
                                    mode_temp=0.1,
                                    copy_temp=0.1,max_len=200)
    score = np.sum(np.log(probs))/len(output)
    if score > best_score:
        best_score = score
        best = output
print(output)
print(best_score)

#Greedy Decoding
output,probs = translate_sample(copy_network,  sentence,type_string,
                                embedded,
                                gen_temp=0,
                                mode_temp=0,
                                copy_temp=0,max_len=200)
score = np.sum(np.log(probs))/len(output)
print(output)
print(score)

print('The closer the score is to 0, the better')

['[[$Move:]]', '[[initial_attack]]', '[[$Move:]]', '[[introduce_task]]', '[[$ObligationPushed:]]', 'zleDT@PBRam.com', '$&', '[[answer_are_you_available]]', '[[$ObligationPushed:]]', 'zleDT@PBRam.com', '$&', '[[answer_are_you_available_for_urgent_task]]', '[[$ObligationPushed:]]', 'zleDT@PBRam.com', '$&', '[[answer_are_you_available_to_pick_up_gift_cards]]', '[[$ObligationPushed:]]', 'zleDT@PBRam.com', '$&', '[[provide_information]]', '[[$KEY:]]', '[[scam.gift_cards.brand]]', '[[$VALUE:]]', 'iTunes', '[[$KEY:]]', '[[scam.gift_cards.denomination]]', '[[$VALUE:]]', '$', '50', '[[$KEY:]]', '[[scam.gift_cards.introduced]]', '[[$VALUE:]]', 'True', '[[$KEY:]]', '[[scam.gift_cards.number]]', '[[$VALUE:]]', '15', '[[$KEY:]]', '[[status.on_the_go]]', '[[$VALUE:]]', 'True', '[[$KEY:]]', '[[status.on_the_go]]', '[[$VALUE:]]', '[[CLS]]']
-0.007844569707767682
['[[$Move:]]', '[[initial_attack]]', '[[$Move:]]', '[[introduce_task]]', '[[$ObligationPushed:]]', 'zleDT@PBRam.com', '$&', '[[answer_are_you

In [11]:
a = [0,-1,-2,-3]
print(sorted(a,reverse=True))

[0, -1, -2, -3]
