In [None]:
# Use this on your local computer
! pip install --upgrade pip
! pip install allennlp
! mkdir -p ~/data/workshop_data/cornell_movie_dialogs_corpus/cornell\ movie-dialogs\ corpus

In [None]:
# Use this on AWS
! conda update -n base -c defaults conda -y
! conda update --all -y
! sudo yum install gcc72-c++ -y
! pip install --no-deps jsonnet allennlp 
! mkdir -p ~/data/workshop_data/cornell_movie_dialogs_corpus/cornell\ movie-dialogs\ corpus

In [1]:
import pandas as pd
import spacy
import os
import unicodedata
import re
from typing import Dict, List, Iterator, Union, Callable
import random
random.seed(42)
import numpy as np
from datetime import datetime

import torch
from torch import nn
from torch.nn import functional as F
import torch.optim as optim
torch.manual_seed(42)
torch.cuda.manual_seed(42)


from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer
from allennlp.data.dataset_readers import DatasetReader
from allennlp.data.tokenizers import Token
from allennlp.predictors import SentenceTaggerPredictor
from allennlp.data import Instance
from allennlp.data.iterators import BucketIterator, BasicIterator
from allennlp.data.fields import TextField, SequenceLabelField
from allennlp.data.vocabulary import Vocabulary
from allennlp.modules.text_field_embedders import TextFieldEmbedder, BasicTextFieldEmbedder
from allennlp.modules.token_embedders import Embedding
from allennlp.modules.seq2seq_encoders import Seq2SeqEncoder, PytorchSeq2SeqWrapper
from allennlp.nn.util import get_text_field_mask, sequence_cross_entropy_with_logits
from allennlp.models import Model

from allennlp.training.trainer import Trainer

MAX_LENGTH = 10


  return f(*args, **kwds)
  return f(*args, **kwds)


Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [10]:
home = os.path.expanduser("~")
data_dir = home + '/data/workshop_data/'
data = 'cornell_movie_dialogs_corpus/cornell movie-dialogs corpus/formatted_movie_lines.txt'
sp_data = 'southpark_full.csv'
log_dir = home + '/data/logs/'
import boto3
if not os.path.exists(log_dir):
    os.mkdir(log_dir)
if not os.path.exists(data_dir + data):
    try:
        import boto3
        s3 = boto3.resource('s3')
        s3.Bucket('workshopqcondata').download_file('data/' + data, data_dir + data)
        s3.Bucket('workshopqcondata').download_file('data/' + sp_data, data_dir + sp_data)
    except:
        print('Please download the data using the fetch notebooks')

In [4]:
class DialogueReader(DatasetReader):
    """
    DatasetReader for reading dialogue data from a csv
    """

    def __init__(self, token_indexers: Dict[str, TokenIndexer] = None) -> None:
        super().__init__(lazy=False)
        self.token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
        self.vocab = None

    def text_to_instance(self, query: List[Token], reply: List[Token] = None) -> Instance:
        query = TextField(query, self.token_indexers)
        fields = {"query": query}
        if reply is not None:
            reply = TextField(reply, self.token_indexers)
            fields["reply"] = reply

        return Instance(fields)
    
    @staticmethod
    def unicode_to_ascii(s):
        return ''.join(
            c for c in unicodedata.normalize('NFD', s)
            if unicodedata.category(c) != 'Mn')
    
    # Lowercase, trim, and remove non-letter characters
    @staticmethod
    def normalize_string(s):
        s = DialogueReader.unicode_to_ascii(s.lower().strip())
        s = re.sub(r"([.!?])", r" \1", s)
        s = s.replace(" . . .", " ...")
        s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
        s = re.sub(r"\s+", r" ", s).strip()
        return s

    def _read(self, file_path: str) -> Iterator[Instance]:
        df = pd.read_csv(file_path, header=None, names=['query', 'reply'], sep='\t')
        for _, row in df.iterrows():
            query = self.normalize_string(row['query']).split(' ')
            reply = self.normalize_string(row['reply']).split(' ')
            if len(query) >= MAX_LENGTH or len(reply) >= MAX_LENGTH:
                continue
            if self.vocab is not None:
                if np.any([self.vocab.get_token_index(x) == 1 for x in query]):
                    # print(f"skipped {query} {[self.vocab.get_token_index(x) for x in query]}")
                    continue
            query = [Token(word) for word in query] + [Token('#EOS#')]
            # print(self.vocab)
            if self.vocab is not None:
                if np.any([self.vocab.get_token_index(x) == 1 for x in reply]):
                    # print(f"skipped {reply} {[self.vocab.get_token_index(x) for x in reply]}")
                    continue
            reply = [Token(word) for word in reply] + [Token('#EOS#')]
            yield self.text_to_instance(query, reply)
 

class CartmanReader(DialogueReader):

    def _read(self, file_path: str) -> Iterator[Instance]:
        df = pd.read_csv(file_path, sep='\t')
        for i in range(len(df)):
            if (df['character'][i] != 'Cartman' or
                df['episode'][i] != df['episode'][i] or
                df['setting'][i] != df['setting'][i] or
                not pd.notnull(df['spoken'][i-1]) or
                not pd.notnull(df['spoken'][i])):
                    continue
            query = self.normalize_string(df['spoken'][i-1]).split(' ')
            reply = self.normalize_string(df['spoken'][i]).split(' ')
            if len(reply) < 3:
                continue
            if len(query) >= MAX_LENGTH or len(reply) >= MAX_LENGTH:
                continue
            if self.vocab is not None:
                if np.any([self.vocab.get_token_index(x) == 1 for x in query]):
                    # print(f"skipped {query} {[self.vocab.get_token_index(x) for x in query]}")
                    continue
            query = [Token(word) for word in query] + [Token('#EOS#')]
            # print(self.vocab)
            if self.vocab is not None:
                if np.any([self.vocab.get_token_index(x) == 1 for x in reply]):
                    # print(f"skipped {reply} {[self.vocab.get_token_index(x) for x in reply]}")
                    continue
            reply = [Token(word) for word in reply] + [Token('#EOS#')]
            yield self.text_to_instance(query, reply)

In [5]:
reader1 = DialogueReader()
dialogue_dataset1 = reader1.read(data)
reader2 = CartmanReader()
cartman_dataset1 = reader2.read(sp_data)

sos_token = '#SOS#'
vocab = Vocabulary.from_instances(dialogue_dataset1 + cartman_dataset1, min_count={'tokens': 2})
vocab.add_token_to_namespace(sos_token)

67210it [00:30, 2188.36it/s]
1576it [00:01, 920.06it/s]
100%|██████████| 68786/68786 [00:00<00:00, 162649.46it/s]


12103

In [6]:
reader = DialogueReader()
reader.vocab = vocab
dialogue_dataset = reader.read(data)

61340it [00:31, 1925.62it/s]


In [7]:
vocab.print_statistics()
print(vocab.get_vocab_size())



----Vocabulary Statistics----


Top 10 most frequent tokens in namespace 'tokens':
	Token: #EOS#		Frequency: 137572
	Token: .		Frequency: 85316
	Token: ?		Frequency: 46524
	Token: you		Frequency: 31360
	Token: i		Frequency: 23926
	Token: s		Frequency: 13499
	Token: what		Frequency: 13444
	Token: !		Frequency: 13254
	Token: it		Frequency: 12442
	Token: the		Frequency: 11704

Top 10 longest tokens in namespace 'tokens':
	Token: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa		length: 33	Frequency: 1
	Token: phreakphreakphreakphreakphreak		length: 30	Frequency: 1
	Token: mmmmmmmmmmmmmnnnnnnmmmmmmmm		length: 27	Frequency: 1
	Token: bebebudjuhbluhbluhbluhbluh		length: 26	Frequency: 1
	Token: bwolololololololololololo		length: 25	Frequency: 1
	Token: dudedudedudedudedudedude		length: 24	Frequency: 1
	Token: aaaaahhnnnhaaaaaannnhhh		length: 23	Frequency: 1
	Token: aaaahhhhnnnahhhnnnggnnn		length: 23	Frequency: 1
	Token: aaaaannnnnuuunnnuhhhhh		length: 22	Frequency: 1
	Token: rowrrowrrowrrowrrowr		length:

In [8]:
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size, embedding: TextFieldEmbedder, n_layers=1, dropout=0):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding

        # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size'
        #   because our input size is a word embedding with number of features == hidden_size
        self.gru = PytorchSeq2SeqWrapper(nn.GRU(
            hidden_size, hidden_size, n_layers,
            dropout=(0 if n_layers == 1 else dropout), bidirectional=True, batch_first=True))

    def forward(self, query: Dict[str, torch.Tensor]):
        # Convert word indexes to embeddings
        mask = get_text_field_mask(query)
        
        embeddings = self.embedding(query)
        
        encoder_out, final_states = self.gru(embeddings, mask)
        
        # Sum bidirectional GRU outputs
        outputs = encoder_out[:, :, :self.hidden_size] + encoder_out[:, : ,self.hidden_size:]
        
        # Return output and final hidden state
        return outputs, final_states
    
# Luong attention layer
class Attn(torch.nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        if self.method not in ['dot', 'general', 'concat']:
            raise ValueError(self.method, "is not an appropriate attention method.")
        self.hidden_size = hidden_size
        if self.method == 'general':
            self.attn = torch.nn.Linear(self.hidden_size, hidden_size)
        elif self.method == 'concat':
            self.attn = torch.nn.Linear(self.hidden_size * 2, hidden_size)
            self.v = torch.nn.Parameter(torch.FloatTensor(hidden_size))

    def dot_score(self, hidden, encoder_output):
        return torch.sum(hidden * encoder_output, dim=2)

    def general_score(self, hidden, encoder_output):
        energy = self.attn(encoder_output)
        return torch.sum(hidden * energy, dim=2)

    def concat_score(self, hidden, encoder_output):
        energy = self.attn(torch.cat((hidden.expand(-1, encoder_output.size(1), -1), encoder_output), 2)).tanh()
        return torch.sum(self.v * energy, dim=2)

    def forward(self, hidden, encoder_outputs):
        # Calculate the attention weights (energies) based on the given method
        if self.method == 'general':
            attn_energies = self.general_score(hidden, encoder_outputs)
        elif self.method == 'concat':
            attn_energies = self.concat_score(hidden, encoder_outputs)
        elif self.method == 'dot':
            attn_energies = self.dot_score(hidden, encoder_outputs)
        # Transpose max_length and batch_size dimensions
        # attn_energies = attn_energies.t()
        # print(attn_energies.shape)

        # Return the softmax normalized probability scores (with added dimension)
        return F.softmax(attn_energies, dim=1).unsqueeze(2)
    
class LuongAttnDecoderRNN(nn.Module):
    def __init__(self, attn_model, embedding: TextFieldEmbedder, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(LuongAttnDecoderRNN, self).__init__()

        # Keep for reference
        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout

        # Define layers
        self.embedding = embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(
            hidden_size, hidden_size, n_layers, 
            dropout=(0 if n_layers == 1 else dropout), batch_first=True)
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

        self.attn = Attn(attn_model, hidden_size)

    def forward(self, input_step, encoder_outputs, last_hidden=None):
        # Note: we run this one step (word) at a time
        # Get embedding of current input word
        embedded = self.embedding({'tokens': input_step})
        embedded = self.embedding_dropout(embedded)
        # Forward through unidirectional GRU
        rnn_output, hidden = self.gru(embedded, last_hidden)
        # Calculate attention weights from the current GRU output
        attn_weights = self.attn(rnn_output, encoder_outputs)#.squeeze(2)
        # Multiply attention weights to encoder outputs to get new "weighted sum" context vector
        context = torch.sum(attn_weights * encoder_outputs, dim=1)
        # Concatenate weighted context vector and GRU output using Luong eq. 5
        rnn_output = rnn_output.squeeze(1)
        context = context.squeeze(1)
        concat_input = torch.cat((rnn_output, context), 1)
        concat_output = torch.tanh(self.concat(concat_input))
        # Predict next word using Luong eq. 6
        output = self.out(concat_output)
        # Return output and final hidden state
        return output, hidden

In [9]:
class Chatbot(Model):
    def __init__(self, encoder: EncoderRNN, decoder: LuongAttnDecoderRNN,
                 vocab: Vocabulary, sos_token: Token, batch_size: int,
                 max_target_len: int, teacher_forcing_ratio):
        super().__init__(vocab)
        self.encoder = encoder
        self.decoder = decoder
        self.vocab = vocab
        self.sos_token = sos_token
        self.max_target_len = max_target_len
        self.teacher_forcing_ratio = teacher_forcing_ratio
        
    @property
    def device(self):
        return next(self.parameters()).device
      
    @staticmethod
    def mask_nll_loss(inp, target, mask):
        n_total = mask.sum()
        print(inp.shape)
        print(target.shape)
        log_likelihood = nn.NLLLoss(reduce=False)(inp.permute(0, 2, 1), target)
        loss = log_likelihood.masked_select(mask.byte()).mean()
        return loss# , n_total.item()

    def forward(self, query: Dict[str, torch.Tensor], reply: Dict[str, torch.Tensor] = None):
        if reply is None:
            print(query)
        
        max_target_len = self.max_target_len if reply is None else reply['tokens'].shape[1]
        output = {}
        encoder_outputs, final_states = encoder(query)
        mask = get_text_field_mask(query)
        selector = mask.sum(dim=1)-1
        batch_size = query['tokens'].shape[0]
        decoder_hidden = None #final_states[:decoder.n_layers] # [0,2]] + final_states[[1,3]]
        # for i in range(batch_size):
        #     decoder_hidden.append(encoder_outputs[i, selector[i]])
        # print(decoder_hidden[0].shape)
        # decoder_hidden = torch.stack(decoder_hidden, dim=0)
        decoder_input = torch.LongTensor([[self.vocab.get_token_index(self.sos_token)] for _ in range(batch_size)])
        decoder_input = decoder_input.to(self.device)
        # Determine if we are using teacher forcing this iteration
        use_teacher_forcing = True if random.random() < self.teacher_forcing_ratio else False
        ret = []
        # Forward batch of sequences through decoder one time step at a time
        if use_teacher_forcing and reply is not None:
            loss = 0
            for t in range(max_target_len):
                decoder_output, decoder_hidden = self.decoder(
                    decoder_input, encoder_outputs, decoder_hidden)
                # Teacher forcing: next input is current target
                decoder_input = reply['tokens'][:, t].unsqueeze(1)
                ret.append(decoder_output)# torch.LongTensor([topi[i][0] for i in range(batch_size)]))
                # Calculate and accumulate loss
        else:
            for t in range(max_target_len):
                decoder_output, decoder_hidden = self.decoder(
                    decoder_input, encoder_outputs, decoder_hidden)
                # print(decoder_hidden.shape, encoder_outputs.shape, decoder_input.shape)
                # No teacher forcing: next input is decoder's own current output
                # _, topi = decoder_output.topk(1)
                decoder_input = decoder_output.argmax(dim=1).unsqueeze(1)
                # print(decoder_output.shape, decoder_input.shape)
                # print(decoder_input)
                # decoder_input = torch.LongTensor([[topi[i][0]] for i in range(batch_size)])
                # print(decoder_output.shape, decoder_input.shape)
                # print(decoder_input)
                ret.append(decoder_output)
                decoder_input = decoder_input.to(self.device)
        ret = torch.stack(ret, dim=1)
        # print(ret)
        # print(reply['tokens'])
        if reply is not None:
            # print(ret.shape, reply['tokens'].shape)
            mask = get_text_field_mask(reply)
            # mask = mask.float()*((reply['tokens'] == self.vocab.get_token_index('#EOS#') |
            #                     (reply['tokens'] == self.vocab.get_token_index('.')).float() * -.75 + 1)
            loss = sequence_cross_entropy_with_logits(ret, reply['tokens'], mask)
            output["loss"] = loss
            # print(f'Averaged loss {loss}')
            #mask = mask.float()        
            #loss = sequence_cross_entropy_with_logits(ret, reply['tokens'], mask, average=None)
            #output["loss"] = (loss*mask.sum(1) / mask.sum(1).mean()).mean()
            # print(f'Calculated loss {output["loss"]}')
        output["reply"] = ret.argmax(dim=2)
        return output
        

In [13]:
# Configure models
model_name = 'cb_model'
attn_model = 'dot'
#attn_model = 'general'
#attn_model = 'concat'
hidden_size = 500
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
batch_size = 64
# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_epochs = 10
print_every = 1
save_every = 500
device = 'cuda:0'

print('Building encoder and decoder ...')
# Initialize word embeddings


token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'),
                            embedding_dim=hidden_size)
word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})

# Initialize encoder & decoder models
encoder = EncoderRNN(hidden_size,word_embeddings, encoder_n_layers, dropout)
decoder = LuongAttnDecoderRNN(attn_model, word_embeddings, hidden_size,
                              vocab.get_vocab_size('tokens'), decoder_n_layers, dropout)
# Use appropriate device
print('Models built and ready to go!')
model = Chatbot(encoder, decoder, vocab, sos_token, batch_size, MAX_LENGTH, teacher_forcing_ratio)
model.to(device)
optimizer = optim.Adam([
                {'params': model.encoder.parameters()},
                {'params': [param for name, param in model.decoder.named_parameters() 
                            if 'embedding' not in name], 'lr': 5*learning_rate}
                        ], lr=learning_rate)

iterator = BucketIterator(batch_size=batch_size, sorting_keys=[("query", "num_tokens")])

iterator.index_with(vocab)
log_path = log_dir + datetime.now().ctime()
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  iterator=iterator,
                  train_dataset=dialogue_dataset,
                  grad_clipping=clip,
                  patience=1,
                  num_epochs=n_epochs,
                  cuda_device=int(device[-1]),
                  serialization_dir = log_path,
                  histogram_interval=1000)

Building encoder and decoder ...
Models built and ready to go!


In [14]:
trainer.train()

loss: 4.3031 ||: 100%|██████████| 959/959 [00:46<00:00, 22.45it/s]
loss: 3.6982 ||: 100%|██████████| 959/959 [00:47<00:00, 20.25it/s]
loss: 3.4865 ||: 100%|██████████| 959/959 [00:47<00:00, 20.34it/s]
loss: 3.3300 ||: 100%|██████████| 959/959 [00:47<00:00, 20.19it/s]
loss: 3.1960 ||: 100%|██████████| 959/959 [00:47<00:00, 20.09it/s]
loss: 3.0715 ||: 100%|██████████| 959/959 [00:47<00:00, 20.23it/s]
loss: 2.9571 ||: 100%|██████████| 959/959 [00:46<00:00, 21.90it/s]
loss: 2.8460 ||: 100%|██████████| 959/959 [00:48<00:00, 19.94it/s]
loss: 2.7396 ||: 100%|██████████| 959/959 [00:46<00:00, 20.57it/s]
loss: 2.6377 ||: 100%|██████████| 959/959 [00:41<00:00, 23.33it/s]


{'best_epoch': 9,
 'peak_cpu_memory_MB': 3913.752,
 'peak_gpu_0_memory_MB': 1586,
 'peak_gpu_1_memory_MB': 490,
 'training_duration': '00:07:48',
 'training_start_epoch': 0,
 'training_epochs': 9,
 'epoch': 9,
 'training_loss': 2.6377186810013153,
 'training_cpu_memory_MB': 3913.752,
 'training_gpu_0_memory_MB': 1586,
 'training_gpu_1_memory_MB': 490}

In [15]:
predictor = SentenceTaggerPredictor(model, dataset_reader=reader)
vec = predictor.predict("where are you from?")['reply']
print(len(vec))
print([vocab.get_token_from_index(x) for x in vec])
vec = predictor.predict("what up?")['reply']
print(len(vec))
print([vocab.get_token_from_index(x) for x in vec])
vec = predictor.predict("I hate you!")['reply']
print(len(vec))
print([vocab.get_token_from_index(x) for x in vec])
vec = predictor.predict("Are you mad?")['reply']
print(len(vec))
print([vocab.get_token_from_index(x) for x in vec])

{'tokens': tensor([[ 46,  21,   5, 106,   4]], device='cuda:0')}
10
['i', 'm', 'going', 'to', 'california', '.', '#EOS#', '.', '#EOS#', '.']
{'tokens': tensor([[ 8, 69,  4]], device='cuda:0')}
10
['i', 'm', 'a', 'bowler', '.', '#EOS#', '.', '#EOS#', '.', '#EOS#']
{'tokens': tensor([[  1, 395,   5,   9]], device='cuda:0')}
10
['i', 'm', 'not', 'asking', 'you', 'to', '.', '#EOS#', '.', '#EOS#']
{'tokens': tensor([[  1,   5, 908,   4]], device='cuda:0')}
10
['i', 'm', 'afraid', 'so', '.', '#EOS#', '.', '#EOS#', '.', '#EOS#']


In [16]:
# Here's how to save the model.
with open("/tmp/dialogue_bot.th", 'wb') as f:
    torch.save(model.state_dict(), f)
vocab.save_to_files("/tmp/vocabulary")

In [None]:
with open("/tmp/dialogue_bot.th", 'rb') as f:
    model.load_state_dict(torch.load(f)

In [17]:
reader = CartmanReader()
reader.vocab = vocab
cartman_dataset = reader.read(sp_data)

1188it [00:01, 683.22it/s]


In [18]:
print(cartman_dataset[1000])

Instance with fields:
 	 query: TextField of length 5 with text: 
 		[like, what, ?, !, #EOS#]
 		and TokenIndexers : {'tokens': 'SingleIdTokenIndexer'} 
 	 reply: TextField of length 10 with text: 
 		[i, want, a, part, in, the, new, movies, !, #EOS#]
 		and TokenIndexers : {'tokens': 'SingleIdTokenIndexer'} 



In [19]:
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  iterator=iterator,
                  train_dataset=cartman_dataset,
                  grad_clipping=clip,
                  patience=1,
                  num_epochs=12,
                  cuda_device=int(device[-1]),
                  serialization_dir=log_path,
                  histogram_interval=1000)

In [20]:
trainer.train()

loss: 4.5654 ||: 100%|██████████| 19/19 [00:00<00:00, 19.08it/s]
loss: 3.5895 ||: 100%|██████████| 19/19 [00:00<00:00, 21.93it/s]


{'best_epoch': 11,
 'peak_cpu_memory_MB': 4204.436,
 'peak_gpu_0_memory_MB': 1590,
 'peak_gpu_1_memory_MB': 490,
 'training_duration': '00:00:02',
 'training_start_epoch': 10,
 'training_epochs': 1,
 'epoch': 11,
 'training_loss': 3.58947827941493,
 'training_cpu_memory_MB': 4204.436,
 'training_gpu_0_memory_MB': 1590,
 'training_gpu_1_memory_MB': 490}

In [None]:
# Here's how to save the model.
with open("/tmp/cartman_bot.th", 'wb') as f:
    torch.save(model.state_dict(), f)

In [21]:
predictor = SentenceTaggerPredictor(model, dataset_reader=reader)
vec = predictor.predict("where are you from?")['reply']
print(len(vec))
print([vocab.get_token_from_index(x) for x in vec])
vec = predictor.predict("what up?")['reply']
print(len(vec))
print([vocab.get_token_from_index(x) for x in vec])
vec = predictor.predict("Kyle is so nice.")['reply']
print(len(vec))
print([vocab.get_token_from_index(x) for x in vec])
vec = predictor.predict("Kenny?")['reply']
print(len(vec))
print([vocab.get_token_from_index(x) for x in vec])
vec = predictor.predict("Aw shit, I missed it!")['reply']
print(len(vec))
print([vocab.get_token_from_index(x) for x in vec])

{'tokens': tensor([[ 46,  21,   5, 106,   4]], device='cuda:0')}
10
['i', 'm', 'going', 'to', 'be', 'ashore', 'with', 'you', '.', '#EOS#']
{'tokens': tensor([[ 8, 69,  4]], device='cuda:0')}
10
['i', 'm', 'dying', '.', '#EOS#', 'off', '.', '#EOS#', '.', '#EOS#']
{'tokens': tensor([[  1,  18,  50, 178,   3]], device='cuda:0')}
10
['yeah', 'that', 's', 'pretty', 'nice', '.', '#EOS#', '.', '#EOS#', '.']
{'tokens': tensor([[1, 4]], device='cuda:0')}
10
['yeah', '.', '#EOS#', 'off', '.', '#EOS#', '.', '#EOS#', '.', '#EOS#']
{'tokens': tensor([[  1, 152,   1,   1, 594,  10,   9]], device='cuda:0')}
10
['i', 'm', 'gonna', 'be', 'awright', '.', '#EOS#', '.', '#EOS#', '.']
