In [None]:
%matplotlib inline
import random
import _dynet
import sys 
sys.argv.append('--dynet-devices')
sys.argv.append('GPU:0')
dyparams = _dynet.DynetParams()

# Fetch the command line arguments (optional)
dyparams.from_args()

# Set some parameters manualy (see the command line arguments documentation)
dyparams.set_mem(2048*4)
dyparams.set_random_seed(666)
# Initialize with the given parameters
dyparams.init() # or init_from_params(dyparams)

import dynet as dy

In [None]:
import json
import nltk
import spacy
from tqdm import tqdm
import numpy as np
nlp = spacy.load('en')


In [None]:
import pickle
data_file = 'elmo_embedded_training_data_merged_0_1.pkl'
embedded_data = pickle.load(open(data_file,'rb'))


In [None]:
vocab = set()
type_vocab = set()
for thread in embedded_data:
    thread = thread[0]
    for message,type,output in thread:
        type_vocab |= set(type)
        vocab |= set(output)

In [None]:
to_remove = set()
safe = ['$&']
for v in vocab:
    if '[[' not in v and v not in safe:
        to_remove.add(v)
vocab -= to_remove

vocab |= set(['<UNK>','True','False'])
o2i = {o:i for i,o in enumerate(sorted(vocab))}
i2o = {i:o for i,o in enumerate(sorted(vocab))}
type_o2i = {o:i for i,o in enumerate(sorted(type_vocab))}
type_i2o = {i:o for i,o in enumerate(sorted(type_vocab))}

In [None]:
all_data = []
copy_data = []
for thread in embedded_data:
    thread_gen = []
    thread_gc = []
    thread_cop = []
    text_thread = thread[0]
    for message,type,output in text_thread:
        gc = []
        gen = []
        cop = []
        
        for t in output:
            t = t.replace('\\n','\n')
            ts = t.split('\n\n')
            t = []
            for t_ in ts:
                t += t_.split('\n')
            ts = t
            for t in ts:
                if t == '':
                    continue
                if t in o2i:
                    gc.append(0)
                    gen.append(o2i[t])
                    cop.append(0)
                elif t not in message:
                    gc.append(0)
                    gen.append(o2i['<UNK>'])
                    cop.append(0)
                    
                else:
                    gc.append(1)
                    gen.append(0)
                    cop.append(message.index(t))
        thread_gen.append(gen)
        thread_gc.append(gc)
        thread_cop.append(cop)
        
    all_data.append(list(zip([m[0] for m in thread[0]],[m[1] for m in thread[0]],[m[2] for m in thread[0]], thread[1],thread_gen,thread_gc,thread_cop)))

In [None]:
import sys


class CopyNetwork:
    def __init__(self, enc_layers, dec_layers, type_embeddings_size,embeddings_size, 
                 enc_state_size, dec_state_size, vocab_size, dropout = 0.1):
        self.model = dy.Model()
        self.type_embeddings = self.model.add_lookup_parameters((len(type_o2i), type_embeddings_size))
        
        # the rnns
        self.DEC_RNN = dy.LSTMBuilder(dec_layers, type_embeddings_size+embeddings_size, dec_state_size, self.model)
        self.DEC_RNN.set_dropout(dropout)
        
        # attention weights
        self.attention_w1 = self.model.add_parameters(( type_embeddings_size+embeddings_size,  type_embeddings_size+embeddings_size))
        self.attention_w2 = self.model.add_parameters(( type_embeddings_size+embeddings_size, dec_state_size))
        self.attention_v = self.model.add_parameters((1,  type_embeddings_size+embeddings_size))

        
        self.copy_w1 = self.model.add_parameters(( type_embeddings_size+embeddings_size,  type_embeddings_size+embeddings_size))
        self.copy_w2 = self.model.add_parameters(( type_embeddings_size+embeddings_size, dec_state_size))
        self.copy_v = self.model.add_parameters((1,  type_embeddings_size+embeddings_size))

        # project the rnn output to a vector of VOCAB_SIZE length
        self.mode_w = self.model.add_parameters((2, dec_state_size))
        self.mode_b = self.model.add_parameters((2))
        
        # project the rnn output to a vector of VOCAB_SIZE length
        self.output_w = self.model.add_parameters((vocab_size, dec_state_size))
        self.output_b = self.model.add_parameters((vocab_size))
        
        self.enc_state_size = enc_state_size
        self.type_embeddings_size = type_embeddings_size
        self.embeddings_size = embeddings_size
    def _run_rnn(self, init_state, input_vecs):
        s = init_state

        states = s.add_inputs(input_vecs)
        rnn_outputs = [s.output() for s in states]
        return rnn_outputs
    def _encode_string(self, embedded_string,RNN):
        initial_state = RNN.initial_state()

        # run_rnn returns all the hidden state of all the slices of the RNN
        hidden_states = self._run_rnn(initial_state, embedded_string)

        return hidden_states
    
    def _attend(self, input_vectors, state, w1, w2,v ):
        attention_weights = []

        w2dt = w2 * state.h()[-1]
        for input_vector in input_vectors:
            
            attention_weight = v * dy.tanh(w1 * input_vector + w2dt)
            attention_weights.append(attention_weight)
        attention_weights = dy.softmax(dy.concatenate(attention_weights))

        output_vectors = dy.esum(
            [vector * attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)])
        return output_vectors, attention_weights
   
    def _embed(self,pos,pos_embed):
        return [pos_embed[p] for p in pos]
    
    def get_probs(self, w, b, rnn):
        return w*rnn+b
    
    def get_loss(self,initial_state, input_string,type_string,embedded_string,output_modes, output_generate, output_copy):
        
        embedded_string = embedded_string[2]
        embedded_string = [dy.inputTensor(e) for e in embedded_string]
        
        
        encoded_string = [dy.concatenate([e,self.type_embeddings[type_o2i[t]]]) for e,t in zip(embedded_string,type_string)]
        if initial_state == None:
            rnn_state = self.DEC_RNN.initial_state().add_input(dy.vecInput(self.type_embeddings_size+self.embeddings_size))
        else:
            rnn_state = initial_state
        mode_probs = []
        gen_probs = []
        copy_probs = []
        
        losses = []
        generate = random.random() < 0.01
        output = []
        for mode,gen,copy in zip(output_modes,output_generate,output_copy):
            attended_encoding,_ = self._attend(encoded_string, rnn_state, 
                                                       self.attention_w1, self.attention_w2,self.attention_v)
            _,p_copy = self._attend(encoded_string, rnn_state, 
                                                       self.copy_w1, self.copy_w2,self.copy_v)
            
            rnn_state = rnn_state.add_input(attended_encoding)
            
            p_mode = self.get_probs(self.mode_w,self.mode_b,rnn_state.output())
            
            p_gen = self.get_probs(self.output_w,self.output_b,rnn_state.output())
            
            mult = False
            if mult:
                p_gen *= p_mode[0]
                p_copy *= p_mode[1]
            mode_err = dy.pickneglogsoftmax(p_mode, mode) 
            copy_err = dy.pickneglogsoftmax(p_copy,copy)  
            gen_err = dy.pickneglogsoftmax(p_gen,gen)
            if not mult:
                losses.append(mode_err)
            
            if  mode == 0:
                losses.append(gen_err)
            else:
                losses.append(copy_err)
            if generate:
                p_mode = p_mode.value()
                p_copy = p_copy.value()
                p_gen = p_gen.value()
                if p_mode[0] > p_mode[1]:
                    output.append(i2o[p_gen.index(max(p_gen))])
                else:
                    output.append(input_string[p_copy.index(max(p_copy))])
        if generate:
            print('IN:',' '.join(input_string))
            print('OUT:',' '.join(output))
            sys.stdout.flush()
        return losses,rnn_state
    

In [None]:
copy_network = CopyNetwork(3, 3,8, 1024, 1024, 1024, len(o2i),dropout = 0.25)
lr = 0.01
trainer = dy.AdamTrainer(copy_network.model,alpha = lr)
copy_network.model.populate(f'Conversationalist_3_3_1024_1024_1024_History_{data_file.split(".")[0]}.model')

In [None]:
from tqdm import tqdm_notebook
lr = 0.000005
trainer.learning_rate = lr
i = 0
for epoch in range(500):
    print(f'EPOCH {epoch}')
    average = []
    random.shuffle(all_data)
    for thread in tqdm_notebook(all_data):
        
        dy.renew_cg()
        previous_state = None
        loss = []
        for message in thread:
            i += 1
            #list(zip([m[0] for m in thread[0]],[m[1] for m in thread[0]], thread[1],thread_gen,thread_gc,thread_cop)))
            
            input_string,type_string, output_string, embedded_string,gen,gOrC, cop = message
            loss_,previous_state = copy_network.get_loss(previous_state,input_string,type_string,embedded_string, gOrC,gen,cop)
            loss += loss_
        tot = len(loss)
        loss = dy.esum(loss)
        loss.backward()
        trainer.update()
        average.append(loss.value()/tot)
        if i > 100:

            print(np.mean(average[:]),)
            sys.stdout.flush()
            i -= 100

    if epoch % 1 == 0:
        lr = lr *0.97
        print('decaying ', lr)
        trainer.learning_rate = lr


In [None]:
copy_network.model.save(f'Conversationalist_3_3_1024_1024_1024_History_{data_file.split(".")[0]}.model')

In [None]:

from allennlp.commands.elmo import ElmoEmbedder
elmo = ElmoEmbedder(options_file='~/DownloadedModels/Elmo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json',
                    weight_file='~/DownloadedModels/Elmo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5',
                    cuda_device=1)

In [None]:
import random
i2o = {i:o for o,i in o2i.items()}


def translate(model,initial_state, input_string,type_string,embedded_string,max_len=30,eos='[[CLS]]'):
        dy.renew_cg()
        embedded_string = embedded_string[2]
        embedded_string = [dy.inputTensor(e) for e in embedded_string]
        encoded_string = [dy.concatenate([e,model.type_embeddings[type_o2i[t]]]) for e,t in zip(embedded_string,type_string)]
        if initial_state == None:
            rnn_state = model.DEC_RNN.initial_state().add_input(dy.vecInput(model.type_embeddings_size+model.embeddings_size))
        else:
            rnn_state = initial_state
        mode_probs = []
        gen_probs = []
        copy_probs = []
        
        losses = []
        generate = random.random() < 0.01
        output = []
        for _ in range(max_len):
            attended_encoding,_ = model._attend(encoded_string, rnn_state, 
                                                       model.attention_w1, model.attention_w2,model.attention_v)
            _,p_copy = model._attend(encoded_string, rnn_state, 
                                                       model.copy_w1, model.copy_w2,model.copy_v)
            
            rnn_state = rnn_state.add_input(attended_encoding)
            
            p_mode = model.get_probs(model.mode_w,model.mode_b,rnn_state.output())
            
            p_gen = model.get_probs(model.output_w,model.output_b,rnn_state.output())
            
            p_mode = p_mode.value()
            p_copy = p_copy.value()
            p_gen = p_gen.value()
            print(p_mode)
            if p_mode[0] > p_mode[1]:
                output.append(i2o[p_gen.index(max(p_gen))])
            else:
                output.append(input_string[p_copy.index(max(p_copy))])
            if output[-1] == eos:
                break
        return output,rnn_state

type_string = ['$FROM_FNAME', '$FROM_LNAME', '$FROM_EMAIL', '$TO_FNAME', '$TO_LNAME', '$TO_EMAIL', '$YEAR', '$MONTH', '$DAY', '$HOUR', '$MINUTE', '$GAP']
sentence = ['John', 'Davin', 'LWmow@SMAqB.com', 'Steven', 'Smalley', 'oFDvG@kIrPz.com', '2019', '10', '9', '7', '43', '$N/A',
            'I', 'hope', 'you', "'re", 'available', 'at', 'the', 'moment', 'for', 'a', 'task', '(', 'urgent', ')', '?',
            '\n\n', 'Let', 'me', 'know', 'either', 'way', 'as', 'soon', 'as', 'possible', '.',
            'I', 'urgently', 'need', 'you', 'to', 'buy', 'some', 'gift', 'cards', 'for', 'me', 'ASAP', '.', 
            'Please', 'confirm', 'that', 'you', "'re", 'on', 'this', '.', '\n\n', '-', 'John', '\n\n', 'Sent', 'from', 'my', 'iPhone']
sentence = ['John', 'Davin', 'LWmow@SMAqB.com', 'Steven', 'Smalley', 'oFDvG@kIrPz.com', '2019', '10', '9', '7', '43', '$N/A',
             'I', 'hope', 'you', "'re", 'available', 'at', 'the', 'moment', 'for', 'a', 'task', '(', 'urgent', ')', '?',
            
           ]

type_string = type_string + ['$BODY']*(len(sentence)-len(type_string))
embedded = elmo.embed_sentence(sentence)
output, rnn_state = translate(copy_network, None, sentence,type_string,embedded)
print(output)

In [None]:
print(all_data[0])