In [1]:
import math, time, os, datetime, shutil, pickle

import numpy as np

import torch
from torch import nn
import torch.nn.functional as F

import import_ipynb
from MoveData import *
from Elements import MultiHeadAttention, Norm, FeedForward
from Talk import *
from Trainer import *
from Talk import talk_to_chloe, get_synonym, string2tensor

importing Jupyter notebook from MoveData.ipynb
importing Jupyter notebook from Elements.ipynb
importing Jupyter notebook from Talk.ipynb
importing Jupyter notebook from EncoderDecoder.ipynb
importing Jupyter notebook from Trainer.ipynb


[nltk_data] Downloading package wordnet to /home/carson/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


## MemoryTransformer

In [22]:
teaching = True

class MemoryTransformer(nn.Module):
    def __init__(self, in_vocab_size, out_vocab_size, emb_dim, n_layers, num_heads, mem_slots, dropout):
        
        super(MemoryTransformer, self).__init__() 
        
        self.mem_slots = mem_slots
        self.mem_size = emb_dim
        self.num_heads = num_heads
        self.dropout = dropout
        self.dim_k = self.mem_size // self.num_heads
        
        self.encoder = Encoder(in_vocab_size, emb_dim, n_layers, num_heads, dropout)
        self.rmc = RelMemCore(mem_slots, mem_size=emb_dim, num_heads=num_heads)
        
        self.current_memory = self.rmc.initial_memory(batch_size=1)
        
        self.mem_encoder = MultiHeadAttention(num_heads,self.mem_size,self.dim_k,dropout)
        self.decoder = Decoder(out_vocab_size, emb_dim, n_layers, num_heads, dropout)
        self.out = nn.Linear(emb_dim, out_vocab_size)
             
    def forward(self, src_seq, trg_seq, src_mask, trg_mask):
        e_output = self.encoder(src_seq, src_mask)
        m_output, m_scores = self.mem_encoder(e_output,self.current_memory,self.current_memory)
        d_output = self.decoder(trg_seq, m_output, src_mask, trg_mask)
        output = self.out(d_output)
        return output

In [23]:
if teaching:
    opt = Options(batchsize=2, device = torch.device("cpu"), epochs=25, lr=0.01, 
                  beam_width=3, max_len = 25, save_path = '../saved/weights/model_weights')

    data_iter, infield, outfield, opt = json2datatools(path='../saved/pairs.json', opt=opt)

    emb_dim, n_layers, num_heads, mem_slots, dropout = 32, 3, 8, 4, 0.01 
    chloe = MemoryTransformer(len(infield.vocab), len(outfield.vocab), 
                              emb_dim, n_layers, num_heads, mem_slots, dropout)
    
    load_subset_weights(chloe, opt)
    print(talk_to_chloe("how?", chloe, opt, infield, outfield))

meowci beaucoup !


In [24]:
def talk2model_MemRL(input_str, model, opt, infield, outfield):
    '''
    input:
        input_str is a string, it is what you want to say to the dialogue model
        model is a Transformer model with encoder, decoder and a last layer linear transformation
        opt is an options object with the maximum length of the output sequence opt.max_len
        infield and outfield are the data.fields that store the vocabulary
    output:
        an output string response from the dialogue model
    
    Note: this version assumes we are evaluating the model on CPU 
    '''
    input_sequence = string2tensor(input_str, infield) # string to tensor 
    input_mask = (input_sequence != infield.vocab.stoi['<pad>']).unsqueeze(-2) #make input mask
    encoding = model.encoder(input_sequence, input_mask) # use the encoder rerepresent the input
    encoding, m_scores = model.mem_encoder(encoding,model.current_memory,model.current_memory)
    init_tok = outfield.vocab.stoi['<sos>'] # this is the integer for the start token
    decoder_input = torch.LongTensor([[init_tok]]) # use start token to initiate the decoder
    logprobs = torch.Tensor([[]])
    
    # continue obtaining the next decoder token until decoder outputs and end token or til max_len
    for pos in range(opt.max_len):
        decoder_input_mask = nopeak_mask(size=pos+1, opt=opt) # make target mask, pos+1 casue pos starts at 0
        # the out vector contains the logits that are rebalanced by the softmax
        out = model.out(model.decoder(decoder_input, encoding, input_mask, decoder_input_mask))
        #softout is a categorical probability distribution over the output vocab
        softout = F.softmax(out, dim=-1)
        distr = Categorical(probs=softout)
        action = distr.sample()[:,-1].unsqueeze(0)
        logprob = -distr.log_prob(action)[:,-1].unsqueeze(0)
        # concatenate that token to our running list of output tokens 
        decoder_input = torch.cat((decoder_input, action), dim=1)
        logprobs = torch.cat((logprobs, logprob), dim=1)
        # if the model outputs an end of sentence token, it is done with this sentence
        if outfield.vocab.itos[action] == '<eos>':
            # [0] because we are assuming batch size of 1 
            # [1:-1] excludes the start and end token from the output string 
            de_str = ' '.join([outfield.vocab.itos[tok] for tok in decoder_input[0]])
            decoder_input_mask = nopeak_mask(size=pos+2, opt=opt) 
            memory_sequence = model.decoder(decoder_input, encoding, input_mask, decoder_input_mask)
            memory_vector = memory_sequence[:,-1,:] # get the last vector as a summary
            model.current_memory = model.rmc.update_memory(memory_vector, model.current_memory)
            return decoder_input, de_str, logprobs

    remember_token = torch.LongTensor([[outfield.vocab.stoi['<eos>']]])
    remember_sequence = torch.cat((decoder_input, remember_token), dim=1)
    decoder_input_mask = nopeak_mask(size=pos+3, opt=opt) 
    memory_sequence = model.decoder(remember_sequence, encoding, input_mask, decoder_input_mask)
    memory_vector = memory_sequence[:,-1,:]  # get the last vector as a summary
    model.current_memory = model.rmc.update_memory(memory_vector, model.current_memory)
    de_str = ' '.join([outfield.vocab.itos[tok] for tok in decoder_input[0]])
    return decoder_input, de_str, logprobs

In [25]:
decoder_input, de_str, logprobs = talk2model_MemRL("how ?", chloe, opt, infield, outfield)
print(de_str)

<sos> meowci beaucoup ! <eos>


Next we need to train the memory. How do we do this? we need to talk to the model and allow it to accumulate at least one cycle of conversation, then teach it to respond correctly given the previous listen-reply exchange

In [6]:
conversation_list = [
    {"listen":"my name is fluffy", "reply":"hello fluffy!"},
    {"listen":"what is my name?", "reply":"its fluffy silly"},
    {"listen":"my name is snuggles", "reply":"hello snuggles!"},
    {"listen":"what is my name?", "reply":"its snuggles silly"},
                    ]

In [40]:
def convo_trainer(conversation_list, model, options):

    optimizer = torch.optim.Adam(chloe.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5)

    sos_tok = torch.LongTensor([[outfield.vocab.stoi['<sos>']]]) 
    eos_tok = torch.LongTensor([[outfield.vocab.stoi['<eos>']]]) 

    model.train()
    start = time.time()
    best_loss = 100
    for epoch in range(options.epochs):
        total_loss = 0
        for i in range(len(conversation_list)):
            listen_sequence = string2tensor(conversation_list[i]["listen"], infield)
            reply_sequence = string2tensor(conversation_list[i]["reply"], infield)
            decoder_input = torch.cat((sos_tok,reply_sequence,eos_tok), dim=1)
            decoder_target = torch.cat((reply_sequence,eos_tok), dim=1).contiguous().view(-1)
            src_mask, trg_mask = create_masks(listen_sequence, decoder_input, options)
            
            encoding = model.encoder(listen_sequence, src_mask) # use the encoder rerepresent the input
            encoding, m_scores = model.mem_encoder(encoding, model.current_memory, model.current_memory)
            
            #decoder_output = model(listen_sequence, decoder_input, src_mask, trg_mask)
            decoding = model.decoder(decoder_input, encoding, src_mask, trg_mask)
            
            memory_vector = decoding[:,-1,:]  # get the last vector as a summary
            model.current_memory = model.rmc.update_memory(memory_vector, model.current_memory)
            
            decoder_logits = model.out(decoding[:,:-1,:])
            
            decoder_output = decoder_logits.view(-1, decoder_logits.size(-1))
            optimizer.zero_grad()
            batch_loss = F.cross_entropy(decoder_output, decoder_target,
                                         ignore_index = options.trg_pad)
            batch_loss.backward(retain_graph=True) #batch_loss.backward(retain_graph=True)
            optimizer.step()
            optimizer.zero_grad()
            total_loss += batch_loss.item()
            
        epoch_loss = total_loss/len(conversation_list)
        scheduler.step(epoch_loss)
        
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            torch.save(model.state_dict(), options.save_path)
        print("%dm: epoch %d loss = %.3f" %((time.time() - start)//60, epoch, epoch_loss))
        total_loss = 0

    return model

In [None]:
opt.epochs = 20
chloe = convo_trainer(conversation_list, chloe, options=opt)

In [42]:
while True:
    tell_chloe = input("You > ")
    decoder_input, chloes_reply, logprobs = talk2model_MemRL(tell_chloe, chloe, opt, infield, outfield)
    if ("bye chloe" in tell_chloe or "bye ttyl" in chloes_reply):
        print('Chloe > '+ chloes_reply + '\n')
        break
    else:
        print('Chloe > '+ chloes_reply + '\n')

You > my name is fluffy
Chloe > <sos> <unk> meowci <unk> <eos>

You > hi
Chloe > <sos> thank am hi <eos>

You > how?
Chloe > <sos> <unk> am <unk> <eos>



KeyboardInterrupt: 