In [None]:
%matplotlib inline
import random
import _dynet
import sys 
import numpy as np
sys.argv.append('--dynet-devices')
sys.argv.append('GPU:0')

dyparams = _dynet.DynetParams()

# Fetch the command line arguments (optional)
dyparams.from_args()

# Set some parameters manualy (see the command line arguments documentation)
dyparams.set_mem(2048*5)
dyparams.set_random_seed(666)
# Initialize with the given parameters
dyparams.init() # or init_from_params(dyparams)

import dynet as dy

In [None]:
import pickle
data_file = 'elmo_embedded_training_data_merged_130k_0_1.pkl'
embedded_data = pickle.load(open(data_file,'rb'))


In [None]:
vocab = set()
type_vocab = set()
for thread in embedded_data:
    thread = thread[0]
    for message,type,output in thread:
        type_vocab |= set(type)
        vocab |= set(output)

In [None]:
to_remove = set()
safe = ['$&']
for v in vocab:
    if '[[' not in v and v not in safe:
        to_remove.add(v)
vocab -= to_remove

vocab |= set(['<START>','!','<UNK>','True','False'])
o2i = {o:i for i,o in enumerate(sorted(vocab))}
i2o = {i:o for i,o in enumerate(sorted(vocab))}
type_o2i = {o:i for i,o in enumerate(sorted(type_vocab))}
type_i2o = {i:o for i,o in enumerate(sorted(type_vocab))}

In [None]:
print(list(sorted(['$_','$&'])))

In [None]:
all_data = []
copy_data = []
for thread in embedded_data:
    thread_gen = []
    thread_gc = []
    thread_cop = []
    text_thread = thread[0]
    for message,type,output in text_thread:
        gc = []
        gen = []
        cop = []
        
        for t in output:
            t = t.replace('\\n','\n')
            ts = t.split('\n\n')
            t = []
            for t_ in ts:
                t += t_.split('\n')
            ts = t
            for t in ts:
                if t == '':
                    continue
                if t in o2i:
                    gc.append(0)
                    gen.append(o2i[t])
                    cop.append(0)
                elif t not in message:
                    gc.append(0)
                    gen.append(o2i['<UNK>'])
                    cop.append(0)                    
                else:
                    gc.append(1)
                    gen.append(0)
                    cop.append(message.index(t))
        thread_gen.append(gen)
        thread_gc.append(gc)
        thread_cop.append(cop)
        if 'LYDkK@Kjseh.com' in message:
            print(message,cop,len(message))
    all_data.append(list(zip([m[0] for m in thread[0]],[m[1] for m in thread[0]],[m[2] for m in thread[0]], thread[1],thread_gen,thread_gc,thread_cop)))

In [None]:
import sys
def is_number(s):
    try:
        float(s)
        return float(s)
    except ValueError:
        return 0.0

class CopyNetwork:
    def __init__(self, enc_layers, dec_layers, type_embeddings_size,embeddings_size, 
                 enc_state_size, dec_state_size,output_embedding_size, vocab_size, dropout = 0.1):
        self.model = dy.Model()
        self.type_embeddings = self.model.add_lookup_parameters((len(type_o2i), type_embeddings_size))
        self.output_embeddings = self.model.add_lookup_parameters((vocab_size, output_embedding_size))
       
        # the rnns
        #1 for type + 1 for history + obv
        self.ENC_RNN_F = dy.LSTMBuilder(enc_layers, 2+type_embeddings_size+embeddings_size, enc_state_size/2, self.model)
        self.ENC_RNN_F.set_dropout(dropout)
        self.ENC_RNN_B = dy.LSTMBuilder(enc_layers, 2+type_embeddings_size+embeddings_size, enc_state_size/2, self.model)
        self.ENC_RNN_B.set_dropout(dropout)
        
        self.DEC_RNN = dy.LSTMBuilder(dec_layers, enc_state_size+output_embedding_size+2, dec_state_size, self.model)
        self.DEC_RNN.set_dropout(dropout)
        
        # attention weights
        self.attention_w1 = self.model.add_parameters((enc_state_size, enc_state_size))
        self.attention_w2 = self.model.add_parameters((enc_state_size, dec_state_size))
        self.attention_v = self.model.add_parameters((1, enc_state_size))

        
        self.copy_w1 = self.model.add_parameters((enc_state_size, enc_state_size))
        self.copy_w2 = self.model.add_parameters((enc_state_size, dec_state_size))
        self.copy_v = self.model.add_parameters((1, enc_state_size))

        # project the rnn output to a vector of VOCAB_SIZE length
        self.mode_w = self.model.add_parameters((2, dec_state_size))
        self.mode_b = self.model.add_parameters((2))
        
        # project the rnn output to a vector of VOCAB_SIZE length
        self.output_w = self.model.add_parameters((vocab_size, dec_state_size))
        self.output_b = self.model.add_parameters((vocab_size))
        
        self.enc_state_size = enc_state_size
        self.type_embeddings_size = type_embeddings_size
        self.embeddings_size = embeddings_size
        self.output_embedding_size = output_embedding_size
    def _run_rnn(self, init_state, input_vecs):
        s = init_state

        states = s.add_inputs(input_vecs)
        rnn_outputs = [s.output() for s in states]
        return rnn_outputs
    def _encode_string(self, embedded_string,RNN):
        initial_state = RNN.initial_state()

        # run_rnn returns all the hidden state of all the slices of the RNN
        hidden_states = self._run_rnn(initial_state, embedded_string)

        return hidden_states
    
    def _attend(self, input_vectors, state, w1, w2,v ):
        attention_weights = []

        w2dt = w2 * state.h()[-1]
        for input_vector in input_vectors:
            
            attention_weight = v * dy.tanh(w1 * input_vector + w2dt)
            attention_weights.append(attention_weight)
        attention_weights = dy.softmax(dy.concatenate(attention_weights))

        output_vectors = dy.esum(
            [vector * attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)])
        return output_vectors, attention_weights
   
    def _embed(self,pos,pos_embed):
        return [pos_embed[p] for p in pos]
    
    def get_probs(self, w, b, rnn):
        return w*rnn+b
    
    def get_loss(self, history_string,input_string,type_string,embedded_string,output_modes, output_generate, output_copy,
                 dropout=0.1,teacher_forcing_=lambda : True,copy_loss_modifier=1.0):
        
        embedded_string = embedded_string
        
        
        #embedded_string = [dy.inputTensor(e) for e in embedded_string]
        #dropped = [0 if random.random()<dropout else 1 for _ in embedded_string]
        #embedded_string = [dy.inputTensor(e*d) for e,d in zip(embedded_string,dropped)]
        dropouted = []
        dropped = []
        numbered = []
        type_dropped = []
        history_dropped = []
        for i,(s,t,e,h) in enumerate(zip(input_string,type_string,embedded_string,history_string)):
            if random.random()<dropout or i in output_copy or t != '$BODY':
                dropouted.append(dy.inputTensor(e))
                numbered.append(is_number(s))
                type_dropped.append(t)
                history_dropped.append(h)
            else:
                dropped.append(i)
        dropped_copy = []
        for d in output_copy:
            if d != 0:
                neg = 0
                for d_i in dropped:
                    if d_i < d:
                        neg -= 1
                d += neg
            dropped_copy.append(d)
        output_copy = dropped_copy    
        embedded_string = dropouted
        
        
        
        embedded_string = [dy.concatenate([dy.inputTensor(np.array([n])),
                                           dy.inputTensor(np.array([h])),
                                           e,
                                           self.type_embeddings[type_o2i[t]]]) for n,h,e,t in zip(numbered,
                                                                                                history_dropped,
                                                                                                embedded_string,
                                                                                                type_dropped)]
        encoded_string_f = self._encode_string(embedded_string,self.ENC_RNN_F)
        encoded_string_b = self._encode_string(list(reversed(embedded_string)),self.ENC_RNN_B)
        encoded_string = [dy.concatenate([f,b]) for f,b in zip(encoded_string_f,reversed(encoded_string_b))]

        mode_probs = []
        gen_probs = []
        copy_probs = []
        
        losses = []
        generate = random.random() < 0.01
        output = []
        
        prev_mode = 0
        prev_tok = o2i['<START>']
        rnn_state = self.DEC_RNN.initial_state().add_input(
                dy.vecInput(self.enc_state_size+self.output_embedding_size+2)
            )
        teacher_forcing = True
        for mode,gen,copy in zip(output_modes,output_generate,output_copy):
            attended_encoding,_ = self._attend(encoded_string, rnn_state, 
                                                       self.attention_w1, self.attention_w2,self.attention_v)
            _,p_copy = self._attend(encoded_string, rnn_state, 
                                                       self.copy_w1, self.copy_w2,self.copy_v)
            
            mode_vec = np.zeros(2)
            mode_vec[prev_mode] = 1
            rnn_input = dy.concatenate([attended_encoding,
                                        self.output_embeddings[prev_tok],
                                        dy.inputTensor(mode_vec)
                                       ])
            
            
            
            rnn_state = rnn_state.add_input(rnn_input)
            
            p_mode = self.get_probs(self.mode_w,self.mode_b,rnn_state.output())
            
            p_gen = self.get_probs(self.output_w,self.output_b,rnn_state.output())
            
            if teacher_forcing:
                prev_mode = mode
                prev_tok = gen
            else:
                prev_mode = p_mode.value()
                prev_mode = prev_mode.index(max(prev_mode))
                prev_tok = p_gen.value()
                prev_tok = prev_tok.index(max(prev_tok))
                
                
                
            mode_err = dy.pickneglogsoftmax(p_mode, mode) 
            copy_err = dy.pickneglogsoftmax(p_copy,copy)*copy_loss_modifier  
            gen_err = dy.pickneglogsoftmax(p_gen,gen)
            
            losses.append(mode_err)            
            if  mode == 0:
                losses.append(gen_err)
            else:
                losses.append(copy_err)
                
            if generate:
                p_mode = p_mode.value()
                p_copy = p_copy.value()
                p_gen = p_gen.value()
                if p_mode[0] > p_mode[1]:
                    output.append(i2o[p_gen.index(max(p_gen))])
                else:
                    output.append(input_string[p_copy.index(max(p_copy))])
        if generate:
            print('IN:',' '.join(input_string))
            print('OUT:',' '.join(output))
            sys.stdout.flush()
        return losses
    

In [None]:
copy_network = CopyNetwork(3, 3,8, 1024, 1024, 1024, len(o2i),len(o2i),dropout = 0.35)
lr = 0.01
trainer = dy.AdamTrainer(copy_network.model,alpha = lr)
random.shuffle(all_data)
split = int(len(all_data)*0.7)
all_data_training = all_data[:split]
all_data_eval = all_data[split:]


In [None]:
from tqdm import tqdm_notebook
lr = 0.00005



trainer.learning_rate = lr
i = 0
best_eval = np.inf
history_length = 2

def run_thread(thread,update):
    
    history = []
    all_loss = []
    for message in thread:
        dy.renew_cg()
        history.append(message)
        
        I,type_string, output_string, embedded_string,gen,gOrC, cop = message
        inputs = []
        types = []
        embeddings = []
        histories = []

        for h,m in enumerate(reversed(history[-history_length:])):
            input_string,type_string, _, embedded_string,_,_,_ = m
            inputs += input_string
            types += [t for t,_ in zip(type_string,input_string)]
            embeddings.append(embedded_string[2])
            histories += [h]*len(input_string)

        embeddings = np.vstack(embeddings)
        for e in embeddings:
            break
        loss = copy_network.get_loss(histories,inputs,types,embeddings, gOrC,gen,cop,
                                     dropout=0.3+random.random()*0.7,
                                    teacher_forcing_=lambda : random.random()<0.15,copy_loss_modifier=1.0)
        loss = dy.esum(loss)
        if update:
            loss.backward()
            trainer.update()
        all_loss.append(loss.value()/(2.0*len(gOrC)))
    return all_loss
for epoch in tqdm_notebook(range(500)):
    print(f'EPOCH {epoch}')
    
    random.shuffle(all_data_training)
    average_training = []
    i = 0
    for thread in tqdm_notebook(all_data_training):        
        average_training += run_thread(thread,True)
        i += len(average_training)
        
        if i > 100:
            i -= 100
            print(np.mean(average_training[:]),)
            sys.stdout.flush()
       
    average_eval = []     
    for thread in tqdm_notebook(all_data_eval):
        average_eval += run_thread(thread,False)
        
    if np.mean(average_eval) < best_eval:
        print('BEST: ',np.mean(average_eval[:]))
        copy_network.model.save(f'Conversationalist_3_3_1024_512_1024_{history_length}_{data_file.split(".")[0]}.model')
        
    print('TRAINING: ',np.mean(average_training[:]))
    print('EVAL: ',np.mean(average_eval[:]))
    import matplotlib.pyplot as plt
    plt.plot(average_training)
    plt.show()
    plt.plot(average_eval)
    plt.show()    
    if epoch % 1 == 0:
        lr = lr *0.97
        print('decaying ', lr)
        trainer.learning_rate = lr


In [None]:
IN: Joshua Jakeway siSTh@GlkOv.com William Minge VtgrL@STldb.com 2019 4 17 14 15 202 50 , right . 

 - Joshua 

 Sent from my iPhone William Minge VtgrL@STldb.com Joshua Jakeway siSTh@GlkOv.com 2019 4 17 14 11 179 50 gift cards , right ? Just want to confirm . 

 - William 

 Sent from my iPhone Joshua Jakeway siSTh@GlkOv.com William Minge VtgrL@STldb.com 2019 4 17 14 8 158 Pay using the company card . 

 - Joshua 

 Sent from my iPhone
OUT: [[$Move:]] [[confirm_gift_card_number]] [[$Move:]] [[provide_guidance]] [[$Move:]] [[provide_information]] [[$ObligationPushed:]] VtgrL@STldb.com $& [[provide_update]] [[$KEY:]] [[status.on_the_go]] [[$VALUE:]] True [[$KEY:]] [[status.on_the_go]] [[$VALUE:]] [[CLS]]

In [None]:
from tqdm import tqdm_notebook
lr = 0.00005



trainer.learning_rate = lr
i = 0
best_eval = np.inf
history_length = 3

def run_thread(thread,update):
    
    history = []
    all_loss = []
    for message in thread:
        dy.renew_cg()
        history.append(message)
        
        I,type_string, output_string, embedded_string,gen,gOrC, cop = message
        inputs = []
        types = []
        embeddings = []
        histories = []

        for h,m in enumerate(reversed(history[-history_length:])):
            input_string,type_string, _, embedded_string,_,_,_ = m
            inputs += input_string
            types += [t for t,_ in zip(type_string,input_string)]
            embeddings.append(embedded_string[2])
            histories += [h]*len(input_string)

        embeddings = np.vstack(embeddings)
        for e in embeddings:
            break
        loss = copy_network.get_loss(histories,inputs,types,embeddings, gOrC,gen,cop,
                                     dropout=0.3+random.random()*0.7,
                                    teacher_forcing_=lambda : random.random()<0.15,copy_loss_modifier=1.0)
        loss = dy.esum(loss)
        if update:
            loss.backward()
            trainer.update()
        all_loss.append(loss.value()/(2.0*len(gOrC)))
    return all_loss
for epoch in  tqdm_notebook(range(500)):
    print(f'EPOCH {epoch}')
    
    random.shuffle(all_data_training)
    average_training = []
    for thread in tqdm_notebook(all_data_training):
        
        history = []
        print(len(thread))
        for message in thread:
            dy.renew_cg()
            history.append(message)
            i += 1
            #list(zip([m[0] for m in thread[0]],[m[1] for m in thread[0]], thread[1],thread_gen,thread_gc,thread_cop)))
            
            I,type_string, output_string, embedded_string,gen,gOrC, cop = message
            inputs = []
            types = []
            embeddings = []
            histories = []
            
            for h,m in enumerate(reversed(history[-history_length:])):
                input_string,type_string, _, embedded_string,_,_,_ = m
                inputs += input_string
                types += [t for t,_ in zip(type_string,input_string)]
                embeddings.append(embedded_string[2])
                histories += [h]*len(input_string)
                
            embeddings = np.vstack(embeddings)
            for e in embeddings:
                break
            loss = copy_network.get_loss(histories,inputs,types,embeddings, gOrC,gen,cop,
                                         dropout=0.3+random.random()*0.7,
                                        teacher_forcing_=lambda : random.random()<0.15,copy_loss_modifier=1.0)
            loss = dy.esum(loss)
            loss.backward()
            trainer.update()
            average_training.append(loss.value()/(2.0*len(gOrC)))
            if i % 100 == 0:

                print(np.mean(average_training[:]),)
                sys.stdout.flush()
    average_eval = []
    for thread in tqdm_notebook(all_data_eval):
        history = []
        for message in thread:
            dy.renew_cg()
            history.insert(0,message)
            i += 1
             
            I,T,_,E,gen,gOrC, cop = message
            inputs = []
            types = []
            embeddings = []
            histories = []
            
            
            for h,m in enumerate(reversed(history[-history_length:])):
                input_string,type_string, _, embedded_string,_,_,_ = m
                inputs += input_string
                types += [t for t,_ in zip(type_string,input_string)]
                embeddings.append(embedded_string[2])
                histories += [h]*len(input_string)
                
                
            embeddings = np.vstack(embeddings)
            loss = copy_network.get_loss(histories,inputs,types,embeddings, gOrC,gen,cop,
                                         dropout=0.3+random.random()*0.7,
                                        teacher_forcing_=lambda : random.random()<0.15,copy_loss_modifier=1.0)
            loss = dy.esum(loss)
            average_eval.append(loss.value()/(2.0*len(gOrC)))
    if np.mean(average_eval) < best_eval:
        print('BEST: ',np.mean(average_eval[:]))
        copy_network.model.save(f'Conversationalist_3_3_1024_512_1024_{history_length}_{data_file.split(".")[0]}.model')
    print('TRAINING: ',np.mean(average_training[:]))
    print('EVAL: ',np.mean(average_eval[:]))
    import matplotlib.pyplot as plt
    plt.plot(average_training)
    plt.show()
    plt.plot(average_eval)
    plt.show()    
    if epoch % 1 == 0:
        lr = lr *0.97
        print('decaying ', lr)
        trainer.learning_rate = lr


In [None]:
print(histories,inputs,types,embeddings, gOrC,gen,cop)
print(output_string)
print(len(inputs))
print(len(types))
print(len(embeddings))


In [None]:
copy_network.model.save(f'Conversationalist_3_3_1024_1024_1024_1_{data_file.split(".")[0]}.model')

In [None]:
pickle.dump((i2o,o2i,type_o2i,type_i2o),open('vocab.pkl','wb'))

In [None]:

from allennlp.commands.elmo import ElmoEmbedder
elmo = ElmoEmbedder(options_file='~/DownloadedModels/Elmo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json',
                    weight_file='~/DownloadedModels/Elmo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5',
                    cuda_device=1)

In [None]:
import random
import numpy
i2o = {i:o for o,i in o2i.items()}


def translate(model,input_string,type_string,embedded_string,max_len=30,eos='[[CLS]]'):
        dy.renew_cg()
        numbered = [is_number(s) for s in input_string]
        embedded_string = embedded_string[2]
        #embedded_string = [dy.inputTensor(e) for e in embedded_string]
        embedded_string = [dy.inputTensor(e) for e in embedded_string]
        
        
        embedded_string = [dy.concatenate([dy.inputTensor(np.array([n])),
                                           e,
                                           model.type_embeddings[type_o2i[t]]]) for n,e,t in zip(numbered,
                                                                                                embedded_string,
                                                                                                type_string)]
        encoded_string_f = model._encode_string(embedded_string,model.ENC_RNN_F)
        encoded_string_b = model._encode_string(list(reversed(embedded_string)),model.ENC_RNN_B)
        encoded_string = [dy.concatenate([f,b]) for f,b in zip(encoded_string_f,reversed(encoded_string_b))]

        mode_probs = []
        gen_probs = []
        copy_probs = []
        
        losses = []
        generate = random.random() < 0.01
        output = []
        prev_mode = 0
        prev_tok = o2i['<START>']
        rnn_state = model.DEC_RNN.initial_state().add_input(
                dy.vecInput(model.enc_state_size+model.output_embedding_size+2)
            )
        for _ in range(max_len):
            attended_encoding,_ = model._attend(encoded_string, rnn_state, 
                                                       model.attention_w1, model.attention_w2,model.attention_v)
            _,p_copy = model._attend(encoded_string, rnn_state, 
                                                       model.copy_w1, model.copy_w2,model.copy_v)
            
            mode_vec = np.zeros(2)
            mode_vec[prev_mode] = 1
            rnn_input = dy.concatenate([attended_encoding,
                                        model.output_embeddings[prev_tok],
                                        dy.inputTensor(mode_vec)
                                       ])
            
            
            
            rnn_state = rnn_state.add_input(rnn_input)
            
            p_mode = model.get_probs(model.mode_w,model.mode_b,rnn_state.output())
            
            p_gen = model.get_probs(model.output_w,model.output_b,rnn_state.output())
            
            p_mode = p_mode.value()
            p_copy = p_copy.value()
            p_gen = p_gen.value()
            
            
            if p_mode[0] > p_mode[1]:
                prev_mode = 0
                prev_tok = p_gen.index(max(p_gen))
                output.append(i2o[p_gen.index(max(p_gen))])
            else:
                prev_mode = 1
                prev_tok = 0
                output.append(input_string[p_copy.index(max(p_copy))])
            if output[-1] == eos:
                break
        return output

In [None]:
import random
def set_dropout(model,dropout):
    model.DEC_RNN.set_dropout(dropout)
    model.ENC_RNN_F.set_dropout(dropout)
    model.ENC_RNN_B.set_dropout(dropout)
    
    
set_dropout(copy_network,0)

def translate_sample(model,input_string,type_string,embedded_string,mode_temp=1.0,gen_temp=1.0,copy_temp=1.0,max_len=30,eos='[[CLS]]'):
        dy.renew_cg()
        numbered = [is_number(s) for s in input_string]
        embedded_string = embedded_string[2]
        embedded_string = [dy.inputTensor(e) for e in embedded_string]
        
        
        embedded_string = [dy.concatenate([dy.inputTensor(np.array([n])),
                                           e,
                                           model.type_embeddings[type_o2i[t]]]) for n,e,t in zip(numbered,
                                                                                                embedded_string,
                                                                                                type_string)]
        encoded_string_f = model._encode_string(embedded_string,model.ENC_RNN_F)
        encoded_string_b = model._encode_string(list(reversed(embedded_string)),model.ENC_RNN_B)
        encoded_string = [dy.concatenate([f,b]) for f,b in zip(encoded_string_f,reversed(encoded_string_b))]

        mode_probs = []
        gen_probs = []
        copy_probs = []
        
        losses = []
        generate = random.random() < 0.01
        output = []
        prev_mode = 0
        prev_tok = o2i['<START>']
        rnn_state = model.DEC_RNN.initial_state().add_input(
                dy.vecInput(model.enc_state_size+model.output_embedding_size+2)
            )
        probs = []
        for _ in range(max_len):
            attended_encoding,_ = model._attend(encoded_string, rnn_state, 
                                                       model.attention_w1, model.attention_w2,model.attention_v)
            _,p_copy = model._attend(encoded_string, rnn_state, 
                                                       model.copy_w1, model.copy_w2,model.copy_v)
            
            mode_vec = np.zeros(2)
            mode_vec[prev_mode] = 1
            rnn_input = dy.concatenate([attended_encoding,
                                        model.output_embeddings[prev_tok],
                                        dy.inputTensor(mode_vec)
                                       ])
            
            rnn_state = rnn_state.add_input(rnn_input)
            
            p_mode = model.get_probs(model.mode_w,model.mode_b,rnn_state.output())
            if mode_temp != 0:
                mode_prob = dy.softmax(p_mode/mode_temp).value()
                prev_mode = np.argmax(np.random.multinomial(1,mode_prob))
            else:
                mode_prob = dy.softmax(p_mode).value()
                prev_mode = np.argmax(mode_prob)
            
            if prev_mode == 0:
                
                p_gen = model.get_probs(model.output_w,model.output_b,rnn_state.output())
                orig_prob = dy.softmax(p_gen).value()
                if gen_temp != 0:
                    gen_prob = dy.softmax(p_gen/gen_temp).value()
                
                    gen_prob = gen_prob/np.sum(gen_prob)
                    prev_tok = np.argmax(np.random.multinomial(1,gen_prob))
                else:
                    prev_tok = np.argmax(orig_prob)
                orig_prob =orig_prob/np.sum(orig_prob)
                output.append(i2o[prev_tok])
                gen_prob = orig_prob[prev_tok]
            else:
                
                orig_prob = dy.softmax(p_copy).value()             
                orig_prob = orig_prob/np.sum(orig_prob)
                if gen_temp != 0:
                    gen_prob = dy.softmax(p_copy/copy_temp).value()                
                    gen_prob = gen_prob/np.sum(gen_prob)
                    copy_tok = np.argmax(np.random.multinomial(1,gen_prob))
                else:
                    copy_tok = np.argmax(orig_prob)          
                               
                prev_tok = 0
                output.append(input_string[copy_tok])
                gen_prob= orig_prob[copy_tok]
            if output[-1] == eos:
                break
            probs.append(mode_prob[prev_mode])
            probs.append(gen_prob)
        return output,probs
type_string = ['$FROM_FNAME', '$FROM_LNAME', '$FROM_EMAIL', '$TO_FNAME', '$TO_LNAME', '$TO_EMAIL', '$YEAR', '$MONTH', '$DAY', '$HOUR', '$MINUTE', '$GAP']
sentence = ['John', 'Davin', 'LWmow@SMAqB.com', 'Steven', 'Smalley', 'oFDvG@kIrPz.com', '2019', '10', '9', '7', '43', '$N/A',
            'I', 'hope', 'you', "'re", 'available', 'at', 'the', 'moment', 'for', 'a', 'task', '(', 'urgent', ')', '?',
            '\n\n', 'Let', 'me', 'know', 'either', 'way', 'as', 'soon', 'as', 'possible', '.',
            'I', 'urgently', 'need', 'you', 'to', 'buy', 'some', 'gift', 'cards', 'for', 'me', 'ASAP', '.', 
            'Please', 'confirm', 'that', 'you', "'re", 'on', 'this', '.', '\n\n', '-', 'John', '\n\n', 'Sent', 'from', 'my', 'iPhone']
sentence =['James', 'Ancelet', 'MjIDf@tCoTs.com', 'Patricia', 'Tikalsky', 'zleDT@PBRam.com', 
           '2019', '8', '10', '8', '1', '$N/A', 
          # 'Do', 'you', 'happen', 'to', 'be', 'available', 'immediately', 'to', 'do', 'something', 'for', 'me',
          # '(', 'it', "'s", 'urgent', ')', '?', 
          # 'I', "'m", 'on', 'jury', 'duty', 'and', 'ca', "n't", 'talk', 'on', 'the', 'phone', ',', 
          # 'so', 'I', 'can', 'only', 'be', 'reached', 'via', 'email', '.',
           'Go',
           'buy', '15', '$', '50', 'iTunes', 'gift', 'cards', 'for', 'me', 'ASAP', '.',
           'I','need','you','to', 'do','this'
           '\n', '-', 'James', ]

  
def prepare_sentence(sentence):
    type_string = ['$FROM_FNAME', '$FROM_LNAME', '$FROM_EMAIL', '$TO_FNAME', '$TO_LNAME', '$TO_EMAIL', 
                   '$YEAR', '$MONTH', '$DAY', '$HOUR', '$MINUTE', '$GAP']

    type_string = type_string + ['$BODY']*(len(sentence)-len(type_string))
    embedded = elmo.embed_sentence(sentence)
    return type_string,embedded




type_string, embedded = prepare_sentence(sentence)
best_score = -np.inf
best = None
for _ in range(20):
    output,probs = translate_sample(copy_network,  sentence,type_string,
                                    embedded,
                                    gen_temp=0.1,
                                    mode_temp=0.1,
                                    copy_temp=0.1,max_len=200)
    score = np.sum(np.log(probs))/len(output)
    if score > best_score:
        best_score = score
        best = output
print(output)
print(best_score)
output,probs = translate_sample(copy_network,  sentence,type_string,
                                embedded,
                                gen_temp=0,
                                mode_temp=0,
                                copy_temp=0,max_len=200)
score = np.sum(np.log(probs))/len(output)
print(output)
print(score)

In [None]:
import random
import numpy
i2o = {i:o for o,i in o2i.items()}


def translate_beam_search(model,input_string,type_string,embedded_string,beam_width=3,max_len=30,eos='[[CLS]]'):
        dy.renew_cg()
        numbered = [is_number(s) for s in input_string]
        embedded_string = embedded_string[2]
        embedded_string = [dy.inputTensor(e) for e in embedded_string]
        
        
        embedded_string = [dy.concatenate([dy.inputTensor(np.array([n])),
                                           e,
                                           model.type_embeddings[type_o2i[t]]]) for n,e,t in zip(numbered,
                                                                                                embedded_string,
                                                                                                type_string)]
        encoded_string_f = model._encode_string(embedded_string,model.ENC_RNN_F)
        encoded_string_b = model._encode_string(list(reversed(embedded_string)),model.ENC_RNN_B)
        encoded_string = [dy.concatenate([f,b]) for f,b in zip(encoded_string_f,reversed(encoded_string_b))]

        mode_probs = []
        gen_probs = []
        copy_probs = []
        
        losses = []
        generate = random.random() < 0.01
        output = []
        prev_mode = 0
        prev_tok = o2i['<START>']
        rnn_state = model.DEC_RNN.initial_state().add_input(
                dy.vecInput(model.enc_state_size+model.output_embedding_size+2)
            )
        
        beams = [(0,0,[],rnn_state,prev_mode,prev_tok)]
        for _ in range(max_len):
            potentials = []
            for score,sum_score,output, rnn_state, prev_mode,prev_tok in beams:
                
                if prev_tok == o2i[eos]:
                    potentials.append((score,sum_score,
                                    output,rnn_state,prev_mode,prev_tok))
                    continue
                    
                attended_encoding,_ = model._attend(encoded_string, rnn_state, 
                                                           model.attention_w1, model.attention_w2,model.attention_v)
                _,p_copy = model._attend(encoded_string, rnn_state, 
                                                           model.copy_w1, model.copy_w2,model.copy_v)

                mode_vec = np.zeros(2)
                mode_vec[prev_mode] = 1
                rnn_input = dy.concatenate([attended_encoding,
                                            model.output_embeddings[prev_tok],
                                            dy.inputTensor(mode_vec)
                                           ])



                rnn_state = rnn_state.add_input(rnn_input)

                p_mode = model.get_probs(model.mode_w,model.mode_b,rnn_state.output())

                p_gen = model.get_probs(model.output_w,model.output_b,rnn_state.output())

                p_mode = dy.softmax(p_mode).value()
                p_copy = p_copy.value()
                p_gen = dy.softmax(p_gen).value()
                options = []
                
                
                probs = [(p,i) for i,p in enumerate(p_gen)]
                probs = sorted(probs,reverse=True)[:beam_width]
                
                for p,i in probs:
                    sum_score_ = sum_score+np.log(p_mode[0])+np.log(p)
                    score = sum_score_/(len(output)+1)
                    options.append((score,sum_score_,
                                    output + [i2o[i]],rnn_state,0,i))

                probs = [(p,i) for i,p in enumerate(p_copy)]
                probs = sorted(probs,reverse=True)[:beam_width]

                for p,i in probs:
                    sum_score_ = sum_score+np.log(p_mode[1])+np.log(p)
                    score = sum_score_/(len(output)+1)
                    options.append((score,sum_score_,
                                    output + [input_string[i]],rnn_state,1,0))
                
                
                options = sorted(options,reverse=True)
                options = options[:beam_width]
                potentials += options
                
            beams = sorted(potentials,reverse=True)[:beam_width]
            
                    
        return beams
    
def prepare_sentence(sentence):
    type_string = ['$FROM_FNAME', '$FROM_LNAME', '$FROM_EMAIL', '$TO_FNAME', '$TO_LNAME', '$TO_EMAIL', 
                   '$YEAR', '$MONTH', '$DAY', '$HOUR', '$MINUTE', '$GAP']

    type_string = type_string + ['$BODY']*(len(sentence)-len(type_string))
    embedded = elmo.embed_sentence(sentence)
    return type_string,embedded



sentence =['James', 'Ancelet', 'MjIDf@tCoTs.com', 'Patricia', 'Tikalsky', 'zleDT@PBRam.com', 
           '2019', '8', '10', '8', '1', '$N/A', 
           'Go',
           'buy', '15', '$', '50', 'iTunes', 'gift', 'cards', 'for', 'me', 'ASAP', '.',
           'I','need','you','to', 'do','this'
           '\n', '-', 'James', ]

type_string, embedded = prepare_sentence(sentence)

beams = translate_beam_search(copy_network,sentence,type_string,embedded,
                      beam_width=5,max_len=200,eos='[[CLS]]')
for b in beams[:]:
    print(b)
    

In [None]:

def translate_nucleus_sample(model,input_string,type_string,embedded_string,mode_temp=1.0,gen_temp=1.0,copy_temp=1.0,nucleus=0.9,max_len=30,eos='[[CLS]]'):
        dy.renew_cg()
        numbered = [is_number(s) for s in input_string]
        embedded_string = embedded_string[2]
        embedded_string = [dy.inputTensor(e) for e in embedded_string]
        
        
        embedded_string = [dy.concatenate([dy.inputTensor(np.array([n])),
                                           e,
                                           model.type_embeddings[type_o2i[t]]]) for n,e,t in zip(numbered,
                                                                                                embedded_string,
                                                                                                type_string)]
        encoded_string_f = model._encode_string(embedded_string,model.ENC_RNN_F)
        encoded_string_b = model._encode_string(list(reversed(embedded_string)),model.ENC_RNN_B)
        encoded_string = [dy.concatenate([f,b]) for f,b in zip(encoded_string_f,reversed(encoded_string_b))]

        mode_probs = []
        gen_probs = []
        copy_probs = []
        
        losses = []
        generate = random.random() < 0.01
        output = []
        prev_mode = 0
        prev_tok = o2i['<START>']
        rnn_state = model.DEC_RNN.initial_state().add_input(
                dy.vecInput(model.enc_state_size+model.output_embedding_size+2)
            )
        probs = []
        for _ in range(max_len):
            attended_encoding,_ = model._attend(encoded_string, rnn_state, 
                                                       model.attention_w1, model.attention_w2,model.attention_v)
            _,p_copy = model._attend(encoded_string, rnn_state, 
                                                       model.copy_w1, model.copy_w2,model.copy_v)
            
            mode_vec = np.zeros(2)
            mode_vec[prev_mode] = 1
            rnn_input = dy.concatenate([attended_encoding,
                                        model.output_embeddings[prev_tok],
                                        dy.inputTensor(mode_vec)
                                       ])
            
            rnn_state = rnn_state.add_input(rnn_input)
            
            p_mode = model.get_probs(model.mode_w,model.mode_b,rnn_state.output())
            if mode_temp != 0:
                mode_prob = dy.softmax(p_mode/mode_temp).value()
                prev_mode = np.argmax(np.random.multinomial(1,mode_prob))
            else:
                mode_prob = dy.softmax(p_mode).value()
                prev_mode = np.argmax(mode_prob)
            
            if prev_mode == 0:
                
                p_gen = model.get_probs(model.output_w,model.output_b,rnn_state.output())
                orig_prob = dy.softmax(p_gen).value()
                if gen_temp != 0:
                    gen_prob = dy.softmax(p_gen/gen_temp).value()                
                    gen_prob = gen_prob/np.sum(gen_prob)
                    
                    gen_prob = [(p,i) for i,p in enumerate(gen_prob)]

                    gen_prob = sorted(gen_prob,reverse=True)
                    cum_prob = 0
                    gen_prob_ = []
                    indices = []
                    for p,i in gen_prob:
                        if cum_prob > nucleus:
                            break
                        gen_prob_.append(p)
                        indices.append(i)
                        cum_prob += p
                    gen_prob_ = np.array(gen_prob_)/cum_prob
                    if len(gen_prob_) >1:
                        print('Gen', gen_prob_)
                    prev_tok = indices[np.argmax(np.random.multinomial(1,gen_prob_))]
                else:
                    prev_tok = np.argmax(orig_prob)
                    
                
                orig_prob =orig_prob/np.sum(orig_prob)
                output.append(i2o[prev_tok])
                gen_prob = orig_prob[prev_tok]
            else:
                
                orig_prob = p_copy.value()             
                orig_prob = orig_prob/np.sum(orig_prob)
                if gen_temp != 0:
                    gen_prob = p_copy.value()
                    gen_prob = np.exp(np.log(gen_prob)/copy_temp)
                    gen_prob = gen_prob/np.sum(gen_prob)
                    gen_prob = [(p,i) for i,p in enumerate(gen_prob)]

                    gen_prob = sorted(gen_prob,reverse=True)
                    cum_prob = 0
                    gen_prob_ = []
                    indices = []
                    for p,i in gen_prob:
                        if cum_prob > nucleus:
                            break
                        gen_prob_.append(p)
                        indices.append(i)
                        cum_prob += p
                    
                    gen_prob_ = np.array(gen_prob_)/cum_prob
                    if len(gen_prob_) >1:
                        print('Copy', gen_prob_)
                    copy_tok = indices[np.argmax(np.random.multinomial(1,gen_prob_))]
                else:
                    copy_tok = np.argmax(orig_prob)          
                               
                prev_tok = 0
                output.append(input_string[copy_tok])
                gen_prob= orig_prob[copy_tok]
            if output[-1] == eos:
                break
            probs.append(mode_prob[prev_mode])
            probs.append(gen_prob)
        return output,probs
    
def set_dropout(model,dropout):
    model.DEC_RNN.set_dropout(dropout)
    model.ENC_RNN_F.set_dropout(dropout)
    model.ENC_RNN_B.set_dropout(dropout)
    
    
set_dropout(copy_network,0)
    
best_score = -np.inf
best = None
for _ in range(50):
    output, probs = translate_nucleus_sample(copy_network,sentence,type_string,embedded,
                      nucleus=0.9,max_len=200,eos='[[CLS]]')
    score = np.sum(np.log(probs))/len(output)
    if score > best_score:
        best_score = score
        best = output
print(output)
print(best_score)

In [None]:
#Thoughts on the data:
#It gets Bed Bath & Beyond but not Bed , Bath & Beyond -- make it more robust to spelling differences
#It also doesn't get Outback Steakhouse -- so maybe add more multi word gift card names

#It still seems to be overfitting
#e.g., 
#    'I','need','you','to',
#           'buy', '457', '$', '50', 'Bed','Bath','&','Beyond', 'gift', 'cards', 'for', 'me', 'ASAP', '.',
#           '\n', '-', 'James', '\n\n', 'Sent', 'from', 'my', 'iPhone']
# works but 
# 'Can', 'you', ...
# doesn't


print(all_data[0][0][0])
print(all_data[0][0][2])

In [None]:
a = [1.3640618446714108e-11, 1.1083164132313719e-06, 1.8857741733250397e-11, 1.3196976318739576e-08, 1.225326221462253e-11, 9.422314555023724e-10, 1.1535195134986747e-07, 0.9999999999563177, 1.2778737349180107e-07, 3.751995254797341e-07, 3.333531359583222e-07, 5.723535090054632e-08, 1.2074775473289648e-08, 4.670662399492747e-09, 1.967804987128595e-10, 1.4175243208498506e-10, 9.109405480614336e-06, 1.8634605090886515e-10, 4.916721603106333e-11, 5.973859828538949e-08, 2.243133992714868e-07, 5.513519188965425e-07, 7.083992813975811e-08, 5.2531029590105224e-08, 2.608313174428328e-08, 7.00641815485009e-08, 2.122040500498444e-08, 3.8131691352691375e-08, 5.6287955942859065e-08, 2.2970600161179208e-07, 5.108544082773639e-08, 3.6798459722839858e-09, 2.0364517935446882e-10, 6.593096613977552e-10, 6.861888500118648e-09, 2.3426219695173565e-11, 1.5258417527711514e-06, 1.0125287163049123e-08, 1.4919654409278586e-07, 7.073738197225914e-10, 7.790397394555158e-09, 9.183113884100136e-09, 2.582737429272353e-09, 1.6410468824877222e-08, 1.5978417986581006e-09, 1.9619556344503147e-09, 1.3803174761553554e-07, 2.628892974755092e-10, 3.0569263001489684e-09, 6.579714757679253e-11, 1.0733344058081496e-08, 2.7581559028193095e-08, 3.1367170418628683e-08, 3.6238681373392987e-08, 2.290931031840497e-08, 5.303551534446905e-08, 5.598832892908799e-09, 1.7237956132790137e-08, 8.645185937551729e-09, 2.8855304021967843e-09, 8.095685031380281e-08, 2.327593806965199e-08, 1.0518557111171977e-08, 1.5701865969952374e-08, 1.4428753368635808e-08, 1.421675942920381e-07, 1.2294342667685022e-08, 1.058213138288223e-07, 1.4286517468852753e-08, 1.2074907280931212e-07, 1.0423292338858633e-08, 1.28734542848808e-08, 1.6960441538455674e-09, 6.553078583853081e-10, 8.345694639465493e-08]
a = np.array(a)
print(np.sum(a))