In [1]:
#Pre-Processing
import os
import re
import torch
import random
import itertools

#Model
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import numpy as np

# For visualising metrics
from visdom import Visdom

# For visualising gradients plot
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

import copy
import math
import time

In [2]:
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("The device found: "+str(device))

The device found: cpu


In [3]:
class VisdomLinePlotter(object):
    """Plots to Visdom"""
    
    def __init__(self, env_name='main'):
        self.viz = Visdom()
        self.env = env_name
        self.plots = {}
    def plot(self, var_name, split_name, title_name, x, y):
        if var_name not in self.plots:
            self.plots[var_name] = self.viz.line(X=np.array([x,x]), Y=np.array([y,y]), env=self.env, opts=dict(
                legend=[split_name],
                title=title_name,
                xlabel='Epochs',
                ylabel=var_name
            ))
        else:
            self.viz.line(X=np.array([x]), Y=np.array([y]), env=self.env, win=self.plots[var_name], name=split_name, update = 'append')

In [4]:
def plot_grad_flow(named_parameters):
    """
        Plotting gradient flow across various layers
        Thanks to: https://discuss.pytorch.org/t/check-gradient-flow-in-network/15063/2
    """   
    ave_grads = []
    layers = []
    for n, p in named_parameters:
        if(p.requires_grad) and ("bias" not in n):
            layers.append(n)
            ave_grads.append(p.grad.abs().mean())
    plt.plot(ave_grads, alpha=0.3, color="b")
    plt.hlines(0, 0, len(ave_grads)+1, linewidth=1, color="k" )
    plt.xticks(range(0,len(ave_grads), 1), layers, rotation="vertical")
    plt.xlim(xmin=0, xmax=len(ave_grads))
    plt.xlabel("Layers")
    plt.ylabel("average gradient")
    plt.title("Gradient flow")
    plt.grid(True)

# Preprocessing

In [5]:
path='C:\\Users\\deepa\\Conversational Agents\\Datasets'
dataset='cornell movie-dialogs corpus'

data_folder=os.path.join(path,dataset)

print("The final data corpus folder: "+str(data_folder))

The final data corpus folder: C:\Users\deepa\Conversational Agents\Datasets\cornell movie-dialogs corpus


In [6]:
def get_lines_conversations():
    """
    Loads movie lines and conversations from the dataset.
    
    data_folder: Destination where conversations and lines are stored.
    
    movie_lines: Consist of movie lines as given by the dataset.
    movie_conversations: Consist of movie conversations as given by the dataset.
    
    """
    f=open(os.path.join(data_folder,'movie_lines.txt'),'r')
    movie_lines=f.read().splitlines()
    f.close()
    
    f=open(os.path.join(data_folder,'movie_conversations.txt'),'r')
    movie_conversations=f.read().splitlines()
    f.close()
    
    return movie_lines,movie_conversations


In [7]:
t1=time.time()
print("Extracting movie lines and movie conversations...")
movie_lines,movie_conversations=get_lines_conversations()

print("Number of distinct lines: "+str(len(movie_lines)))
print("Number of conversations: "+str(len(movie_conversations)))
print("Average Number of lines per conversations: "+str(len(movie_lines)/len(movie_conversations)))

print(movie_lines[0])
print(movie_conversations[0])

print("Extracting took place in: "+str(time.time()-t1))

Extracting movie lines and movie conversations...
Number of distinct lines: 304713
Number of conversations: 83097
Average Number of lines per conversations: 3.6669554857576085
L1045 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ They do not!
u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L194', 'L195', 'L196', 'L197']
Extracting took place in: 0.2782161235809326


In [8]:
def loadLines(movie_lines,fields):
    lines={}
    for line in movie_lines:
        values=line.split(" +++$+++ ")
        
        lineVals={}
        
#         print("values"+str(len(values)))
#         print("fields"+str(len(fields)))
              
        for i,field in enumerate(fields):
            lineVals[field]=values[i]
        
        lines[lineVals['lineID']]=lineVals
    
    return lines

def loadConversations(movie_conversations,lines,fields):
    conversations=[]
    
    for convo in movie_conversations:
        values=convo.split(" +++$+++ ")
        conVals={}
       
        for i,field in enumerate(fields):
            conVals[field]=values[i]
        
        lineIDs=eval(conVals["utteranceIDs"])
        
        conVals["lines"]=[]
        
        for lineID in lineIDs:
            conVals["lines"].append(lines[lineID])
        conversations.append(conVals)
        
    return conversations

def sentencePairs(conversations):
    qr_pairs=[]
    
    for conversation in conversations:
        for i in range(len(conversation["lines"])-1):
            query=conversation["lines"][i]["text"].strip()
            response=conversation["lines"][i+1]["text"].strip()
            
            if query and response:
                qr_pairs.append([query,response])
        
    return qr_pairs

In [9]:
t1=time.time()
print("Separating meaningfull information for our model...")

lines={}
conversations=[]
qr_pairs=[]

movie_lines_fields=["lineID","characterID","movieID","character","text"]
movie_convo_fields=["charcaterID","character2ID","movieID","utteranceIDs"]

lines=loadLines(movie_lines,movie_lines_fields)
conversations=loadConversations(movie_conversations,lines,movie_convo_fields)
qr_pairs=sentencePairs(conversations)

print("The number of query-response pairs are: "+str(len(qr_pairs)))
print("Separation took place in: "+str(time.time()-t1))


Separating meaningfull information for our model...
The number of query-response pairs are: 221282
Separation took place in: 2.646280527114868


In [10]:
PAD_Token=0
START_Token=1
END_Token=2

class Vocabulary:
    def __init__(self):
        self.trimmed=False
        self.word2count={}
        self.index2word={PAD_Token:"PAD",START_Token:"SOS",END_Token:"EOS"}
        self.word2index={"PAD":PAD_Token,"SOS":START_Token,"EOS":END_Token}
        self.num_words=3
        
    def addSentence(self,sentence):
        for word in sentence.split(" "):
            self.addWord(word)
    def addWord(self,word):
        if word not in self.word2index:
            self.word2index[word]=self.num_words
            self.index2word[self.num_words]=word
            self.word2count[word]=1
            self.num_words=self.num_words+1
        else:
            self.word2count[word]+=1
            
    def trim(self,min_count):
        
        if self.trimmed:
            return
        self.trimmed=True
        
        keep_words=[]
        
        for word,freq in self.word2count.items():
            if freq>=min_count:
                keep_words.append(word)
        
        self.word2count={}
        self.index2word={PAD_Token:"PAD",START_Token:"SOS",END_Token:"EOS"}
        self.word2index={"PAD":PAD_Token,"SOS":START_Token,"EOS":END_Token}
        self.num_words=3
        
        for word in keep_words:
            self.addWord(word)

In [11]:
Max_Length=10

def normalizeString(s):
    s=s.lower().strip()
    s=re.sub(r"([.!?])", r" \1", s)
    s=re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    s=re.sub(r"\s+", r" ", s).strip()
    return s

def readVocs(qr_pairs):
    
    for qr_pair in qr_pairs:
        qr_pair[0]=normalizeString(qr_pair[0])
        qr_pair[1]=normalizeString(qr_pair[1])
    
    voc=Vocabulary()
    return voc,qr_pairs

def filterPair(pair):
    return len(pair[0].split(" "))<Max_Length and len(pair[1].split(" "))<Max_Length

def filterPairs(qr_pairs):
    return [pair for pair in qr_pairs if filterPair(pair)]

def prepareDataset(qr_pairs):
    voc, qr_pairs=readVocs(qr_pairs)
    qr_pairs=filterPairs(qr_pairs)
       
    for pair in qr_pairs:
        voc.addSentence(pair[0])
        voc.addSentence(pair[1])
#     print("Number"+str(voc.num_words))
    return voc,qr_pairs

t1=time.time()
print("Preparing dataset and corresponding vocabulary...")
voc, pairs=prepareDataset(qr_pairs)
print("Preparation took place in: "+str(time.time()-t1))

Preparing dataset and corresponding vocabulary...
Preparation took place in: 9.411579608917236


In [12]:
Min_Count=3

def trimRareWords(voc,qr_pairs):
    
    voc.trim(Min_Count)
    keep_pairs=[]
    
    for pair in qr_pairs:
        input_sentence=pair[0]
        output_sentence=pair[1]
        
        keep_input=True
        keep_output=True
        
        for word in input_sentence.split(" "):
            if word not in voc.word2index:
                keep_input=False
                break
        
        for word in output_sentence.split(" "):
            if word not in voc.word2index:
                keep_output=False
                break
                
        if keep_input and keep_output:
            keep_pairs.append(pair)
            
    return keep_pairs

t1=time.time()
print("Trimming rare words from vocabulary and dataset..")

pairs=trimRareWords(voc,pairs)

print("Trimming took place in: "+str(time.time()-t1))


Trimming rare words from vocabulary and dataset..
Trimming took place in: 0.30527830123901367


In [13]:
def indexesFromSentence(voc,sentence):
    tokenised_sentence=[]
    tokenised_sentence.append(START_Token)
    
    for word in sentence.split(" "):
        tokenised_sentence.append(voc.word2index[word])
        
    tokenised_sentence.append(END_Token)
    
    assert len(tokenised_sentence)<=Max_Length+2
    for _ in range(Max_Length+2-len(tokenised_sentence)):
        tokenised_sentence.append(PAD_Token)
        
    return tokenised_sentence

def binaryMatrix(l,value=PAD_Token):
    m=[]
    for i,seq in enumerate(l):
        m.append([])
        for token in seq:
            if token==value:
                m[i].append(0)
            else:
                m[i].append(1)
        
    return m

def inputVar(voc,l):
    
    indexes_batch=[indexesFromSentence(voc,sentence) for sentence in l]
    input_lengths=torch.tensor([len(index) for index in indexes_batch])
    padVar=torch.LongTensor(indexes_batch)
    return input_lengths,padVar

def outputVar(voc,l):
    indexes_batch=[indexesFromSentence(voc,sentence) for sentence in l]
    max_target_len=torch.tensor([len(index) for index in indexes_batch])
    mask=binaryMatrix(indexes_batch)
    mask=torch.ByteTensor(mask)
    padVar=torch.LongTensor(indexes_batch)
    return max_target_len, mask, padVar

def batch2TrainData(voc,pair_batch):
    #sort function see 
    input_batch=[]
    output_batch=[]

    for pair in pair_batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
                                  
    
    input_lengths,tokenised_input=inputVar(voc,input_batch)
    max_out_length,mask,tokenised_output=outputVar(voc,output_batch)
    return input_lengths,tokenised_input,max_out_length,mask,tokenised_output



In [14]:
print("Number of query-response pairs after all the preprocessing: "+str(len(pairs)))

#Sample batch
batch=[random.choice(pairs) for _ in range(5)]
input_lengths,tokenised_input,max_out_length,mask,tokenised_output=batch2TrainData(voc,batch)

print("Input length: "+str(input_lengths)+" Size: "+str(input_lengths.shape))
print("-"*80)
print("Tokenised Input: "+str(tokenised_input)+" Size: "+str(tokenised_input.shape))
print("-"*80)
print("Max out length: "+str(max_out_length)+" Size: "+str(max_out_length.shape))
print("-"*80)
print("Mask: "+str(mask)+" Size: "+str(mask.shape))
print("-"*80)
print("Tokenised Output: "+str(tokenised_output)+" Size: "+str(tokenised_output.shape))
print("-"*80)

Number of query-response pairs after all the preprocessing: 53113
Input length: tensor([12, 12, 12, 12, 12]) Size: torch.Size([5])
--------------------------------------------------------------------------------
Tokenised Input: tensor([[   1, 2762,    4,   25,  200,   12, 2209,  140,    4,    2,    0,    0],
        [   1,   76,   37,   36, 6525,   66,    2,    0,    0,    0,    0,    0],
        [   1,  122,  101,  250,  518,    7,  129,    6,    2,    0,    0,    0],
        [   1,  401,  177,  111,   66,    2,    0,    0,    0,    0,    0,    0],
        [   1, 2324,    4,    4,    4,  167,   64,    4,    2,    0,    0,    0]]) Size: torch.Size([5, 12])
--------------------------------------------------------------------------------
Max out length: tensor([12, 12, 12, 12, 12]) Size: torch.Size([5])
--------------------------------------------------------------------------------
Mask: tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
       

# Model 1


In [15]:
class EncoderDecoder(nn.Module):
    """
    A standard Encoder-Decoder architecture. Base for this and many 
    other models.
    """
    def __init__(self, encoder, decoder, src_embed, tgt_embed, generator):
        super(EncoderDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.src_embed = src_embed
        self.tgt_embed = tgt_embed
        self.generator = generator
        
    def forward(self, src, tgt, src_mask, tgt_mask):
        "Take in and process masked src and target sequences."
        return self.decode(self.encode(src, src_mask), src_mask,
                            tgt, tgt_mask)
    
    def encode(self, src, src_mask):
        return self.encoder(self.src_embed(src), src_mask)
    
    def decode(self, memory, src_mask, tgt, tgt_mask):
        return self.decoder(self.tgt_embed(tgt), memory, src_mask, tgt_mask)

In [16]:
class Generator(nn.Module):
    "Define standard linear + softmax generation step."
    def __init__(self, d_model, vocab):
        super(Generator, self).__init__()
        self.proj = nn.Linear(d_model, vocab)

    def forward(self, x):
        return F.log_softmax(self.proj(x), dim=-1)

In [17]:
def clones(module, N):
    "Produce N identical layers."
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])

In [18]:
class Encoder(nn.Module):
    "Core encoder is a stack of N layers"
    def __init__(self, layer, N):
        super(Encoder, self).__init__()
        self.layers = clones(layer, N)
        self.norm = LayerNorm(layer.size)
        
    def forward(self, x, mask):
        "Pass the input (and mask) through each layer in turn."
        for layer in self.layers:
            x = layer(x, mask)
        return self.norm(x)

In [19]:
class LayerNorm(nn.Module):
    "Construct a layernorm module (See citation for details)."
    def __init__(self, features, eps=1e-6):
        super(LayerNorm, self).__init__()
        self.a_2 = nn.Parameter(torch.ones(features))
        self.b_2 = nn.Parameter(torch.zeros(features))
        self.eps = eps

    def forward(self, x):
        mean = x.mean(-1, keepdim=True)
        std = x.std(-1, keepdim=True)
        return self.a_2 * (x - mean) / (std + self.eps) + self.b_2

In [20]:
class SublayerConnection(nn.Module):
    """
    A residual connection followed by a layer norm.
    Note for code simplicity the norm is first as opposed to last.
    """
    def __init__(self, size, dropout):
        super(SublayerConnection, self).__init__()
        self.norm = LayerNorm(size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, sublayer):
        "Apply residual connection to any sublayer with the same size."
        return x + self.dropout(sublayer(self.norm(x)))

In [21]:
class EncoderLayer(nn.Module):
    "Encoder is made up of self-attn and feed forward (defined below)"
    def __init__(self, size, self_attn, feed_forward, dropout):
        super(EncoderLayer, self).__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.sublayer = clones(SublayerConnection(size, dropout), 2)
        self.size = size

    def forward(self, x, mask):
        "Follow Figure 1 (left) for connections."
        x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, mask))
        return self.sublayer[1](x, self.feed_forward)

In [22]:
class Decoder(nn.Module):
    "Generic N layer decoder with masking."
    def __init__(self, layer, N):
        super(Decoder, self).__init__()
        self.layers = clones(layer, N)
        self.norm = LayerNorm(layer.size)
        
    def forward(self, x, memory, src_mask, tgt_mask):
        for layer in self.layers:
            x = layer(x, memory, src_mask, tgt_mask)
        return self.norm(x)

In [23]:
class DecoderLayer(nn.Module):
    "Decoder is made of self-attn, src-attn, and feed forward (defined below)"
    def __init__(self, size, self_attn, src_attn, feed_forward, dropout):
        super(DecoderLayer, self).__init__()
        self.size = size
        self.self_attn = self_attn
        self.src_attn = src_attn
        self.feed_forward = feed_forward
        self.sublayer = clones(SublayerConnection(size, dropout), 3)
 
    def forward(self, x, memory, src_mask, tgt_mask):
        "Follow Figure 1 (right) for connections."
        m = memory
        x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, tgt_mask))
        x = self.sublayer[1](x, lambda x: self.src_attn(x, m, m, src_mask))
        return self.sublayer[2](x, self.feed_forward)

In [24]:
def subsequent_mask(size):
    "Mask out subsequent positions."
    attn_shape = (1, size, size)
    subsequent_mask = np.triu(np.ones(attn_shape), k=1).astype('uint8')
    return torch.from_numpy(subsequent_mask) == 0

In [25]:
def attention(query, key, value, mask=None, dropout=None):
    "Compute 'Scaled Dot Product Attention'"
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) \
             / math.sqrt(d_k)
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
    p_attn = F.softmax(scores, dim = -1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn

In [26]:
class MultiHeadedAttention(nn.Module):
    def __init__(self, h, d_model, dropout=0.1):
        "Take in model size and number of heads."
        super(MultiHeadedAttention, self).__init__()
        assert d_model % h == 0
        # We assume d_v always equals d_k
        self.d_k = d_model // h
        self.h = h
        self.linears = clones(nn.Linear(d_model, d_model), 4)
        self.attn = None
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, query, key, value, mask=None):
        "Implements Figure 2"
        if mask is not None:
            # Same mask applied to all h heads.
            mask = mask.unsqueeze(1)
        nbatches = query.size(0)
        
        # 1) Do all the linear projections in batch from d_model => h x d_k 
        query, key, value = \
            [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
             for l, x in zip(self.linears, (query, key, value))]
        
        # 2) Apply attention on all the projected vectors in batch. 
        x, self.attn = attention(query, key, value, mask=mask, 
                                 dropout=self.dropout)
        
        # 3) "Concat" using a view and apply a final linear. 
        x = x.transpose(1, 2).contiguous() \
             .view(nbatches, -1, self.h * self.d_k)
        return self.linears[-1](x)

In [27]:
class PositionwiseFeedForward(nn.Module):
    "Implements FFN equation."
    def __init__(self, d_model, d_ff, dropout=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        return self.w_2(self.dropout(F.relu(self.w_1(x))))

In [28]:
class Embeddings(nn.Module):
    def __init__(self, d_model, vocab):
        super(Embeddings, self).__init__()
        self.lut = nn.Embedding(vocab, d_model)
        self.d_model = d_model

    def forward(self, x):
        return self.lut(x) * math.sqrt(self.d_model)

In [29]:
class PositionalEncoding(nn.Module):
    "Implement the PE function."
    def __init__(self, d_model, dropout, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) *
                             -(math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
        
    def forward(self, x):
        x = x + Variable(self.pe[:, :x.size(1)], 
                         requires_grad=False)
        return self.dropout(x)

In [30]:
def make_model(src_vocab, tgt_vocab, N=6, 
               d_model=512, d_ff=2048, h=8, dropout=0.1):
    "Helper: Construct a model from hyperparameters."
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), 
                             c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))
    
    # This was important from their code. 
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)
    return model

# Model 2

In [106]:
class EncoderDecoder(nn.Module):
    
    def __init__(self,encoder,decoder,source_embed,target_embed,generator):
        super().__init__()
        
        self.encoder=encoder
        self.decoder=decoder
        
        self.source_embed=source_embed
        self.target_embed=target_embed
        
        self.generator=generator # Linear + Log_softmax
        
    def forward(self,source,target,source_mask,target_mask):
        return self.decode(self.encode(source,source_mask),source_mask,target,target_mask)
    
    def encode(self,source,source_mask):
        return self.encoder(self.source_embed(source),source_mask)
    
    def decode(self,memory, source_mask,target,target_mask):
        return self.decoder(self.target_embed(target),memory,source_mask,target_mask)
    

In [107]:
class Generator(nn.Module):
    
    def __init__(self,d_model,vocab_size):
        super().__init__()
        self.projection=nn.Linear(d_model,vocab_size)
        
    def forward(self,decoder_output):
        return F.log_softmax(self.projection(decoder_output),dim=-1)
    

In [108]:
def clones(module,N):
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])

In [109]:
class Encoder(nn.Module):
    
    def __init__(self,layer,N):
        super().__init__()
        
        self.layers=clones(layer,N)
        self.norm=LayerNorm(layer.size)
    
    def forward(self,x,mask):
        
        for layer in self.layers:
            x=layer(x,mask)
        
        return self.norm(x)

In [110]:
class EncoderLayer(nn.Module):
    def __init__(self,size,self_attn,feed_forward,dropout):
        super().__init__()
        
        self.attn=self_attn
        self.feed_forward=feed_forward
        self.sublayer=clones(SublayerConnection(size,dropout),2)
        self.size=size
        
    def forward(self,x,mask):
        
        x=self.sublayer[0](x,lambda x: self.attn(x,x,x,mask))
        return self.sublayer[1](x,self.feed_forward)
        

In [111]:
class LayerNorm(nn.Module):
    
    def __init__(self,features,eps=1e-6):
        super().__init__()
        self.a_2=nn.Parameter(torch.ones(features))
        self.b_2=nn.Parameter(torch.zeros(features))
        self.eps=eps
        
    def forward(self,x):
        mean=x.mean(-1,keepdim=True)
        std=x.std(-1,keepdim=True)
        return self.a_2*(x-mean)/(x+std)+self.b_2

In [112]:
class SublayerConnection(nn.Module):
    
    def __init__(self,size,dropout):
        super().__init__()
        
        self.dropout=nn.Dropout(dropout)
        self.norm=LayerNorm(size)
        
    def forward(self,x,sublayer):
        return x+self.dropout(sublayer(self.norm(x)))
    

In [113]:
class Decoder(nn.Module):
    
    def __init__(self,layer,N):
        super().__init__()
        
        self.layers=clones(layer,N)
        self.norm=LayerNorm(layer.size)
    
    def forward(self,x,memory,curr_mask,tgt_mask):
        
        for layer in self.layers:
            x=layer(x,memory,curr_mask,tgt_mask)
            
        return self.norm(x)
    

In [114]:
class DecoderLayer(nn.Module):
    
    def __init__(self,size,self_attn,src_attn,feed_forward,dropout):
        super().__init__()
        
        self.size=size
        self.self_attn=self_attn
        self.src_attn=src_attn
        self.feed_forward=feed_forward
        
        self.sublayer=clones(SublayerConnection(size,dropout),3)
        
    def forward(self,x,memory,src_mask,tgt_mask):
        
        m=memory
        x=self.sublayer[0](x,lambda x:self.self_attn(x,x,x,tgt_mask))
        x=self.sublayer[1](x,lambda x: self.src_attn(x,m,m,src_mask))
        return self.sublayer[2](x,self.feed_forward)
        

In [115]:
class MultiHeadedAttention(nn.Module):
    
    def __init__(self,h,d_model,dropout=0.1):
        super().__init__()
        
        assert d_model%h==0
        
        self.d_k=d_model//h
        self.h=h
        self.linears=clones(nn.Linear(d_model,d_model),4)
        self.attn=None
        self.dropout=nn.Dropout(dropout)
        
    def forward(self,query,key,values,mask=None):
        
        if mask is not None:
            mask=mask.unsqueeze(1)
            
        nbatches=query.size(0)
        
        query,key,values=[l(x).view(nbatches,-1,self.h,self.d_k).transpose(1,2) for l, x in zip(self.linears,(query,key,values))]
        
        x,self.attn=attention(query,key,values,mask=mask,dropout=self.dropout)
        
        x=x.transpose(1,2).contiguous().view(nbatches,-1,self.h*self.d_k)
        
        return self.linears[-1](x)
        

In [116]:
def attention(query,key,value,mask=None,dropout=None):
    
    d_k=query.size(-1)

    scores=torch.matmul(query,key.transpose(-2,-1))/math.sqrt(d_k)
    
    if mask is not None:
        scores=scores.masked_fill(mask==0,-1e9)
        
    p_attn=F.softmax(scores,dim=-1)
    
    if dropout is not None:
        p_attn=dropout(p_attn)
        
    return torch.matmul(p_attn,value),p_attn
    

In [117]:
class PositionwiseFeedForward(nn.Module):
    
    def __init__(self,d_model,d_ff,dropout=0.1):
        super().__init__()
        
        self.w_1=nn.Linear(d_model,d_ff)
        self.w_2=nn.Linear(d_ff,d_model)
        self.dropout=nn.Dropout(dropout)
        
    def forward(self,x):
        return self.w_2(self.dropout(F.relu(self.w_1(x))))
    

In [118]:
class Embeddings(nn.Module):
    
    def __init__(self,d_model,vocab):
        super().__init__()
        
        self.embed=nn.Embedding(vocab,d_model)
        self.d_model=d_model
    
    def forward(self,x):
        return self.embed(x)*math.sqrt(self.d_model)


In [119]:
class PositionalEncoding(nn.Module):
    
    def __init__(self,d_model,dropout,max_len=5000):
        super().__init__()
        
        self.dropout=nn.Dropout(dropout)
        pe=torch.zeros(max_len,d_model,dtype=torch.float)
        position=torch.arange(0.,max_len).unsqueeze(1)
        div_term=torch.exp(torch.arange(0.,d_model,2)*-(math.log(10000.0)/d_model))
        
        pe[:,0::2]=torch.sin(position*div_term)
        pe[:,1::2]=torch.cos(position*div_term)
        
        pe=pe.unsqueeze(0)
        self.register_buffer('pe',pe)
        
    def forward(self,x):
        
        x=x+Variable(self.pe[:,:x.size(1)],requires_grad=False)
        return self.dropout(x)
        

In [120]:
def make_model2(src_vocab,tgt_vocab,N=6,d_model=512,d_ff=2048,h=8,dropout=0.1):
    
    c=copy.deepcopy
    attn=MultiHeadedAttention(h,d_model)
    ff=PositionwiseFeedForward(d_model,d_ff,dropout)
    position=PositionalEncoding(d_model,dropout)
    model=EncoderDecoder(Encoder(EncoderLayer(d_model,c(attn),c(ff),dropout),N),
                        Decoder(DecoderLayer(d_model,c(attn),c(attn),c(ff),dropout),N),
                        nn.Sequential(Embeddings(d_model,src_vocab),c(position)),
                        nn.Sequential(Embeddings(d_model,tgt_vocab),c(position)),
                        Generator(d_model,tgt_vocab))
    
    for p in model.parameters():
        if p.dim()>1:
            nn.init.xavier_uniform_(p)
    return model

In [121]:
sample_model=make_model(voc.num_words,voc.num_words,1,512,2048,8,0.1)
# print(sample_model)



In [35]:
#Sample Run
source=torch.ones(5,12,dtype=torch.long)
target=torch.ones(5,12,dtype=torch.long)
source_mask=None
target_mask=torch.ones(5,12,12,dtype=torch.long)
out=sample_model(source,target,source_mask,target_mask)
print("-"*80)
print("Output size: "+str(out.shape))
print("-"*80)

--------------------------------------------------------------------------------
Output size: torch.Size([5, 12, 512])
--------------------------------------------------------------------------------


In [45]:
"""
triu function generates a copy of matrix with elemens below kth diagonal zeroed.
The main diagonal is zeroeth diagonal above is first(k=1) and so on.

Eg:
A=[[1,2,3],[4,5,6],[7,8,9]]
for above matrix:
triu(A,k=1)
will give [[0,2,3],[0,0,6],[0,0,0]]
"""

def subsequent_mask(size):
    attn_shape=(1,size,size)
    mask=np.triu(np.ones(attn_shape),k=1).astype('uint8')
    
    return torch.from_numpy(mask)==0

# Training

In [192]:
def data_generation(pairs,batch_size,n_batches):
    
    sample_batches=[batch2TrainData(voc,[random.choice(pairs) for _ in range(batch_size)]) for _ in range(n_batches)]
    batches=[]
    
    for i in range(n_batches):
        batches.append(Batch(sample_batches[i][1],sample_batches[i][-1]))
#     batches=[]
#     for i in range(n_batches):
#         data = torch.from_numpy(np.random.randint(1, 11, size=(batch_size, 10)))
#         data[:,0]=1
        
#         batches.append(Batch(data,data))
    
    return batches

In [193]:
# class Batch:
    
#     def __init__(self,sample_batch,pad):
        
#         self.src=sample_batch[1]
#         self.src_mask=self.make_src_mask(self.src,pad)
#         self.trg=sample_batch[-1][:,:-1]
#         self.trg_mask=self.make_trg_mask(self.trg,pad)
#         self.trg_y=sample_batch[-1][:,1:]
#         self.ntokens=(self.trg_y!=pad).data.sum()
        
#     @staticmethod
#     def make_src_mask(src,pad):
#         return (src!=pad).unsqueeze(-2)
#     @staticmethod    
#     def make_trg_mask(trg,pad):
#         trg_mask=(trg!=pad).unsqueeze(-2)
# #         trg_mask=trg_mask&Variable(subsequent_mask(trg.size(-1)).type_as(trg_mask.data))
#         return trg_mask
class Batch:
    "Object for holding a batch of data with mask during training."
    def __init__(self, src, trg=None, pad=0):
        src=torch.tensor(src).to(torch.int64)
        trg=torch.tensor(trg).to(torch.int64)
        self.src = src
        self.src_mask = (src != pad).unsqueeze(-2)
        if trg is not None:
            self.trg = trg[:, :-1]
            self.trg_y = trg[:, 1:]
            self.trg_mask = \
                self.make_std_mask(self.trg, pad)
            self.ntokens = (self.trg_y != pad).data.sum()
    
    @staticmethod
    def make_std_mask(tgt, pad):
        "Create a mask to hide padding and future words."
        tgt_mask = (tgt != pad).unsqueeze(-2)
        tgt_mask = tgt_mask & Variable(
            subsequent_mask(tgt.size(-1)).type_as(tgt_mask.data))
        return tgt_mask        

In [194]:
def run_epoch(data,model,loss_compute):
    
    start_time=time.time()
    total_tokens=0
    total_loss=0
    tokens=0
    
    out=model(data.src,data.trg,data.src_mask,data.trg_mask)
#     print("Model output: "+str(out.size()))
    loss=loss_compute(out,data.trg_y,data.ntokens)
    
    return loss
  

In [195]:
def customLossFunction(outputs,target):
    batch_size=outputs.size()[0]
    numberOfWords=outputs.size()[1]
    outputs=F.softmax(outputs,dim=-1)
    loss=0
    normalisingVal=0
#     print(outputs)
#     print(target)
    for i in range(batch_size):
        for j in range(numberOfWords):
            trg=target[i][j]
            if trg!=0:
                
                currLoss=-(outputs[i][j][trg]+5)
                loss+=currLoss
                normalisingVal+=1
    return loss/normalisingVal

In [196]:
# class LabelSmoothing(nn.Module):
    
#     def __init__(self):
#         super().__init__()
#         self.criteria=customLossFunction()
#     def forward(self,x,target):
#         return self.criteria(x,target)
class LabelSmoothing(nn.Module):
    "Implement label smoothing."
    def __init__(self, size, padding_idx, smoothing=0.0):
        super(LabelSmoothing, self).__init__()
        self.criterion = nn.KLDivLoss(size_average=False)
        self.padding_idx = padding_idx
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.size = size
        self.true_dist = None
        
    def forward(self, x, target):
#         print("Before assertion: "+str(x.size())+str(self.size))
        assert x.size(1) == self.size
        true_dist = x.data.clone()
        true_dist.fill_(self.smoothing / (self.size - 2))
        true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        true_dist[:, self.padding_idx] = 0
        mask = torch.nonzero(target.data == self.padding_idx)
        if mask.dim() > 0:
            true_dist.index_fill_(0, mask.squeeze(), 0.0)
        self.true_dist = true_dist
        return self.criterion(x, Variable(true_dist, requires_grad=False))

In [197]:
# class LossCompute:
    
#     def __init__(self,model,opt):
        
#         self.opt=opt
#         self.model=model
    
#     def __call__(self,x,y,norm):
        
#         x=self.model.generator(x)
#         loss=customLossFunction(x,y)
        
    
       

#         loss.backward()
        
# #         _=nn.utils.clip_grad_norm_(model.parameters(),50.0)
        
#         plot_grad_flow(self.model.named_parameters())
        
#         self.opt.step()
#         self.opt.optimizer.zero_grad()
        
#         return loss.item()

class SimpleLossCompute:
    "A simple loss compute and train function."
    def __init__(self, generator, criterion, opt=None):
        self.generator = generator
        self.criterion = criterion
        self.opt = opt
        
    def __call__(self, x, y, norm):
        x = self.generator(x)
#         print(str(x.size())+" "+str(y.size()))
        loss = self.criterion(x.contiguous().view(-1, x.size(-1)), 
                              y.contiguous().view(-1)) / norm
        loss.backward()
        if self.opt is not None:
            self.opt.step()
            self.opt.optimizer.zero_grad()
        return loss.item()* norm

        
        

In [198]:
class NoamOpt:
    "Optim wrapper that implements rate."
    def __init__(self, model_size, factor, warmup, optimizer):
        self.optimizer = optimizer
        self._step = 0
        self.warmup = warmup
        self.factor = factor
        self.model_size = model_size
        self._rate = 0
        
    def step(self):
        "Update parameters and rate"
        self._step += 1
        rate = self.rate()
        for p in self.optimizer.param_groups:
            p['lr'] = rate
        self._rate = rate
        self.optimizer.step()
        
    def rate(self, step = None):
        "Implement `lrate` above"
        if step is None:
            step = self._step
        return self.factor * \
            (self.model_size ** (-0.5) *
            min(step ** (-0.5), step * self.warmup ** (-1.5)))
        
def get_std_opt(model):
    return NoamOpt(model.src_embed[0].d_model, 2, 4000,
            torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))

In [235]:
batches=data_generation(pairs,30,200)



In [236]:
print("Initialising and creating models....")
V=voc.num_words
t1=time.time()
# criterion=LabelSmoothing()
criterion = LabelSmoothing(size=V, padding_idx=0, smoothing=0.0)

model=make_model(V,V)
# model_opt=torch.optim.Adam(model.parameters(),lr=0.0001,betas=(0.9,0.988),eps=1e-9)
model_opt = NoamOpt(model.src_embed[0].d_model, 1, 400,
        torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))
print("="*100)
print("Creating Models took: "+str(time.time()-t1))



model.train()
for epoch in range(1000):
    
    current_batch=batches[epoch%10]
    loss_val=run_epoch(current_batch,model,SimpleLossCompute(model.generator, criterion, model_opt))
    print("Epoch: "+str(epoch)+" Loss Value: "+str(loss_val))
    
    

Initialising and creating models....




Creating Models took: 1.4429008960723877
Epoch: 0 Loss Value: tensor(1661.3982)
Epoch: 1 Loss Value: tensor(1767.8641)
Epoch: 2 Loss Value: tensor(1619.6750)
Epoch: 3 Loss Value: tensor(1679.2676)
Epoch: 4 Loss Value: tensor(1594.7104)
Epoch: 5 Loss Value: tensor(1517.5872)
Epoch: 6 Loss Value: tensor(1622.8674)
Epoch: 7 Loss Value: tensor(1686.8176)
Epoch: 8 Loss Value: tensor(1544.1543)
Epoch: 9 Loss Value: tensor(1411.3167)
Epoch: 10 Loss Value: tensor(1406.6030)
Epoch: 11 Loss Value: tensor(1514.2871)
Epoch: 12 Loss Value: tensor(1375.1333)
Epoch: 13 Loss Value: tensor(1434.7544)
Epoch: 14 Loss Value: tensor(1374.3829)
Epoch: 15 Loss Value: tensor(1323.0715)
Epoch: 16 Loss Value: tensor(1444.1528)
Epoch: 17 Loss Value: tensor(1519.0183)
Epoch: 18 Loss Value: tensor(1381.5721)
Epoch: 19 Loss Value: tensor(1268.0056)
Epoch: 20 Loss Value: tensor(1264.0984)
Epoch: 21 Loss Value: tensor(1373.3706)
Epoch: 22 Loss Value: tensor(1240.5237)
Epoch: 23 Loss Value: tensor(1296.3038)
Epoch: 24

Epoch: 204 Loss Value: tensor(264.9053)
Epoch: 205 Loss Value: tensor(209.1859)
Epoch: 206 Loss Value: tensor(271.9621)
Epoch: 207 Loss Value: tensor(310.4354)
Epoch: 208 Loss Value: tensor(241.3702)
Epoch: 209 Loss Value: tensor(237.3477)
Epoch: 210 Loss Value: tensor(239.2936)
Epoch: 211 Loss Value: tensor(233.2827)
Epoch: 212 Loss Value: tensor(217.3691)
Epoch: 213 Loss Value: tensor(218.3309)
Epoch: 214 Loss Value: tensor(237.1003)
Epoch: 215 Loss Value: tensor(203.4698)
Epoch: 216 Loss Value: tensor(244.1450)
Epoch: 217 Loss Value: tensor(262.4188)
Epoch: 218 Loss Value: tensor(226.4227)
Epoch: 219 Loss Value: tensor(192.7823)
Epoch: 220 Loss Value: tensor(190.2818)
Epoch: 221 Loss Value: tensor(195.8247)
Epoch: 222 Loss Value: tensor(185.8961)
Epoch: 223 Loss Value: tensor(221.7044)
Epoch: 224 Loss Value: tensor(189.2341)
Epoch: 225 Loss Value: tensor(161.3306)
Epoch: 226 Loss Value: tensor(223.5115)
Epoch: 227 Loss Value: tensor(227.8885)
Epoch: 228 Loss Value: tensor(195.8510)


Epoch: 411 Loss Value: tensor(84.5990)
Epoch: 412 Loss Value: tensor(73.7816)
Epoch: 413 Loss Value: tensor(94.1788)
Epoch: 414 Loss Value: tensor(80.8856)
Epoch: 415 Loss Value: tensor(78.8191)
Epoch: 416 Loss Value: tensor(65.9505)
Epoch: 417 Loss Value: tensor(111.5567)
Epoch: 418 Loss Value: tensor(71.3852)
Epoch: 419 Loss Value: tensor(61.9815)
Epoch: 420 Loss Value: tensor(83.6383)
Epoch: 421 Loss Value: tensor(79.2348)
Epoch: 422 Loss Value: tensor(70.3225)
Epoch: 423 Loss Value: tensor(67.2563)
Epoch: 424 Loss Value: tensor(64.4877)
Epoch: 425 Loss Value: tensor(87.6884)
Epoch: 426 Loss Value: tensor(83.6164)
Epoch: 427 Loss Value: tensor(89.0980)
Epoch: 428 Loss Value: tensor(85.0314)
Epoch: 429 Loss Value: tensor(66.7686)
Epoch: 430 Loss Value: tensor(78.2035)
Epoch: 431 Loss Value: tensor(70.3387)
Epoch: 432 Loss Value: tensor(55.8073)
Epoch: 433 Loss Value: tensor(57.8971)
Epoch: 434 Loss Value: tensor(75.0411)
Epoch: 435 Loss Value: tensor(78.7423)
Epoch: 436 Loss Value: t

Epoch: 622 Loss Value: tensor(22.5853)
Epoch: 623 Loss Value: tensor(39.1139)
Epoch: 624 Loss Value: tensor(41.7381)
Epoch: 625 Loss Value: tensor(37.6131)
Epoch: 626 Loss Value: tensor(31.1770)
Epoch: 627 Loss Value: tensor(39.2105)
Epoch: 628 Loss Value: tensor(40.2496)
Epoch: 629 Loss Value: tensor(30.8867)
Epoch: 630 Loss Value: tensor(30.9811)
Epoch: 631 Loss Value: tensor(18.7754)
Epoch: 632 Loss Value: tensor(23.6770)
Epoch: 633 Loss Value: tensor(34.6227)
Epoch: 634 Loss Value: tensor(34.3228)
Epoch: 635 Loss Value: tensor(44.3680)
Epoch: 636 Loss Value: tensor(50.8872)
Epoch: 637 Loss Value: tensor(55.8322)
Epoch: 638 Loss Value: tensor(34.4418)
Epoch: 639 Loss Value: tensor(28.5176)
Epoch: 640 Loss Value: tensor(36.8985)
Epoch: 641 Loss Value: tensor(24.3582)
Epoch: 642 Loss Value: tensor(29.7211)
Epoch: 643 Loss Value: tensor(33.0502)
Epoch: 644 Loss Value: tensor(40.9046)
Epoch: 645 Loss Value: tensor(38.7519)
Epoch: 646 Loss Value: tensor(56.6139)
Epoch: 647 Loss Value: te

Epoch: 833 Loss Value: tensor(28.4957)
Epoch: 834 Loss Value: tensor(23.1933)
Epoch: 835 Loss Value: tensor(27.8827)
Epoch: 836 Loss Value: tensor(31.8343)
Epoch: 837 Loss Value: tensor(45.5584)
Epoch: 838 Loss Value: tensor(24.6433)
Epoch: 839 Loss Value: tensor(40.1086)
Epoch: 840 Loss Value: tensor(14.0517)
Epoch: 841 Loss Value: tensor(45.7869)
Epoch: 842 Loss Value: tensor(17.1411)
Epoch: 843 Loss Value: tensor(24.7697)
Epoch: 844 Loss Value: tensor(35.7185)
Epoch: 845 Loss Value: tensor(21.0150)
Epoch: 846 Loss Value: tensor(27.4565)
Epoch: 847 Loss Value: tensor(37.7810)
Epoch: 848 Loss Value: tensor(20.9623)
Epoch: 849 Loss Value: tensor(32.7831)
Epoch: 850 Loss Value: tensor(27.0957)
Epoch: 851 Loss Value: tensor(31.1149)
Epoch: 852 Loss Value: tensor(14.8984)
Epoch: 853 Loss Value: tensor(15.5938)
Epoch: 854 Loss Value: tensor(16.9590)
Epoch: 855 Loss Value: tensor(19.6497)
Epoch: 856 Loss Value: tensor(24.7219)
Epoch: 857 Loss Value: tensor(29.5932)
Epoch: 858 Loss Value: te

In [237]:
def greedy_decode(model, src, src_mask, max_len, start_symbol):
    memory = model.encode(src, src_mask)
    ys = torch.ones(1, 1).fill_(start_symbol).type_as(src.data)
    for i in range(max_len-1):
        out = model.decode(memory, src_mask, 
                           Variable(ys), 
                           Variable(subsequent_mask(ys.size(1))
                                    .type_as(src.data)))
        prob = model.generator(out[:, -1])
        _, next_word = torch.max(prob, dim = 1)
        next_word = next_word.data[0]
        ys = torch.cat([ys, 
                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=1)
    return ys

In [238]:
model.eval()

EncoderDecoder(
  (encoder): Encoder(
    (layers): ModuleList(
      (0): EncoderLayer(
        (self_attn): MultiHeadedAttention(
          (linears): ModuleList(
            (0): Linear(in_features=512, out_features=512, bias=True)
            (1): Linear(in_features=512, out_features=512, bias=True)
            (2): Linear(in_features=512, out_features=512, bias=True)
            (3): Linear(in_features=512, out_features=512, bias=True)
          )
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (feed_forward): PositionwiseFeedForward(
          (w_1): Linear(in_features=512, out_features=2048, bias=True)
          (w_2): Linear(in_features=2048, out_features=512, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (sublayer): ModuleList(
          (0): SublayerConnection(
            (norm): LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): SublayerConnection(
            (norm): LayerNorm()

In [None]:
for batch in batches:
    for i in range(5):
        output=greedy_decode(model,batch.src[i].view(-1,12),batch.src_mask[i].view(1,-1,12),10,1)
        src=batch.src[i].view(-1)
        trg=batch.trg[i].view(-1)
        pred=output.view(-1)
        print(src.size())
        for id in src:
            print(id)
        src_sentence=[voc.index2word[id.item()] for id in src]
        trg_sentence=[voc.index2word[id.item()] for id in trg]
        pred_sentence=[voc.index2word[id.item()] for id in pred]
        print(src_sentence)
        print(trg_sentence)
        print(pred_sentence)
        print("-"*80)
#         print("-"*80)
#         print(str(output)+" "+str(batch.src[i])+" "+str(batch.trg[i]))
    

torch.Size([12])
tensor(1)
tensor(60)
tensor(158)
tensor(12)
tensor(845)
tensor(199)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'go', 'get', 'a', 'bag', 'man', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'and', 'here', '.', 'here', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'and', 'here', '.', 'here', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(25)
tensor(387)
tensor(1229)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'i', 'am', 'joe', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'you', '!', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'you', '!', 'EOS', 'to', 'kill', 'you', '!', 'EOS', '!']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(77)
tensor(37)
tensor(177)
tensor(2445)
tensor(6)
tensor(2)
tensor(0)
tens

torch.Size([12])
tensor(1)
tensor(91)
tensor(66)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'daddy', '!', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'yes', 'amy', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'yes', 'amy', '?', 'EOS', '!', 'EOS', '?', 'EOS', '!']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(131)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'because', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'they', 'got', 'to', 'have', 'something', 'that', 'we', 'want', '.', 'EOS']
['SOS', 'they', 'got', 'to', 'have', 'something', 'that', 'we', 'want', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(242)
tensor(96)
tensor(9)
tensor(2213)
tensor(4)
tenso

torch.Size([12])
tensor(1)
tensor(50)
tensor(47)
tensor(7)
tensor(260)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'what', 'do', 'you', 'mean', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'think', 'you', 'know', 'what', 'i', 'mean', '.', 'EOS', 'PAD']
['SOS', 'i', 'think', 'you', 'know', 'what', 'i', 'mean', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(7)
tensor(247)
tensor(117)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'you', 'didn', 't', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'and', 'you', 'know', 'what', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'and', 'you', 'know', 'what', '?', 'EOS', 'for', '?', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(1571)
tensor(2868)
tensor(4)
tensor(36)
tensor(37)
tensor(53)


torch.Size([12])
tensor(1)
tensor(38)
tensor(266)
tensor(1172)
tensor(7802)
tensor(4)
tensor(2213)
tensor(70)
tensor(19)
tensor(4)
tensor(2)
tensor(0)
['SOS', 'all', 'right', 'miss', 'juspeczyk', '.', 'pack', 'your', 'things', '.', 'EOS', 'PAD']
['SOS', 'am', 'i', 'free', 'to', 'go', '?', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'that', 's', 'not', 'possible', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(25)
tensor(669)
tensor(75)
tensor(7)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'i', 'worry', 'about', 'you', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'me', '?', '?', 'c', 'mon', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'it', 's', 'me', '.', 'it', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(197)
tensor(117)
tensor(364)
tensor(3348)
tensor(4)
tensor(2

torch.Size([12])
tensor(1)
tensor(77)
tensor(37)
tensor(187)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'who', 's', 'dead', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'nobody', '.', 'loretta', 'is', 'getting', 'married', '.', 'EOS', 'PAD', 'PAD']
['SOS', 'al', '!', 'EOS', '.', 'EOS', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(869)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'ok', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'you', 'just', 'move', 'in', 'here', '?', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'walker', 'and', 'williams', '.', 'EOS', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(167)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tens

torch.Size([12])
tensor(1)
tensor(147)
tensor(582)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'how', 'long', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'what', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'right', 'now', 'everything', 'i', 'was', 'my', 'kid', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(9)
tensor(4181)
tensor(1903)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'my', 'apologies', 'madam', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'it', 's', 'okay', 'puff', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'this', 'guy', 'on', 'your', 't', '.v', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(25)
tensor(197)
tensor(117)
tensor(26)
tensor(7)
tenso

torch.Size([12])
tensor(1)
tensor(150)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'people', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'people', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'now', '.', 'EOS', '.', 'EOS', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(36)
tensor(116)
tensor(117)
tensor(198)
tensor(1136)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'that', 'doesn', 't', 'make', 'sense', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'add', 'it', 'to', 'the', 'list', '.', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'a', 'sense', 'of', 'proportion', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(34)
tensor(625)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
t

torch.Size([12])
tensor(1)
tensor(34)
tensor(4)
tensor(7)
tensor(132)
tensor(380)
tensor(1577)
tensor(159)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
['SOS', 'no', '.', 'you', 'll', 'be', 'safe', 'here', '.', 'EOS', 'PAD', 'PAD']
['SOS', 'i', 'don', 't', 'want', 'to', 'be', 'safe', '!', 'EOS', 'PAD']
['SOS', 'i', 'believe', 'it', 'you', 'he', 's', 'crazy', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(25)
tensor(534)
tensor(40)
tensor(60)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'i', 'got', 'to', 'go', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'what', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'you', 'want', 'to', 'play', 'him', '?', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(5335)
tensor(147)
tensor(92)
tensor(65)
tensor(1827)
tensor(6)
tensor(

torch.Size([12])
tensor(1)
tensor(80)
tensor(7)
tensor(18)
tensor(492)
tensor(86)
tensor(490)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'would', 'you', 'like', 'some', 'more', 'coffee', '?', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'please', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'was', 'lying', '.', 'EOS', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(5)
tensor(92)
tensor(7)
tensor(123)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'where', 'are', 'you', 'going', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'm', 'calling', 'her', '!', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'mind', 'your', 'own', 'business', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(34)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor

torch.Size([12])
tensor(1)
tensor(25)
tensor(94)
tensor(117)
tensor(2562)
tensor(22)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'i', 'can', 't', 'dance', 'tonight', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'it', 'will', 'pass', 'it', 'will', 'pass', 'come', '.', 'EOS', 'PAD']
['SOS', 'you', 're', 'very', 'funny', 'EOS', 'depinto', '?', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(1710)
tensor(4)
tensor(169)
tensor(3163)
tensor(122)
tensor(253)
tensor(139)
tensor(164)
tensor(4)
tensor(2)
tensor(0)
['SOS', 'dinner', '.', 'with', 'stan', 'and', 'his', 'new', 'girl', '.', 'EOS', 'PAD']
['SOS', 'when', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'and', 'here', '.', 'five', '.', 'what', '?', 'EOS', '?']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(33)
tensor(68)
tensor(7)
tensor(329)


torch.Size([12])
tensor(1)
tensor(96)
tensor(112)
tensor(12)
tensor(786)
tensor(4)
tensor(157)
tensor(4)
tensor(4)
tensor(4)
tensor(2)
tensor(0)
['SOS', 'in', 'just', 'a', 'second', '.', 'two', '.', '.', '.', 'EOS', 'PAD']
['SOS', 'i', 'don', 't', 'believe', 'this', '.', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'oh', '.', '.', '.', 'EOS', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(115)
tensor(76)
tensor(935)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'is', 'it', 'over', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'no', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'don', 't', 'be', 'getting', 'racist', 'with', 'me', 'man']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(1418)
tensor(53)
tensor(609)
tensor(4)
tensor(4)
tensor(4)
tensor(2)
ten

torch.Size([12])
tensor(1)
tensor(12)
tensor(2143)
tensor(7)
tensor(128)
tensor(96)
tensor(53)
tensor(2778)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
['SOS', 'a', 'dress', 'you', 'wear', 'in', 'the', 'evening', '.', 'EOS', 'PAD', 'PAD']
['SOS', 'what', 'do', 'you', 'wear', 'in', 'the', 'morning', '?', 'EOS', 'PAD']
['SOS', 'this', 'morning', '.', 'EOS', '.', 'EOS', 'this', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(242)
tensor(7)
tensor(94)
tensor(8)
tensor(9)
tensor(2870)
tensor(4)
tensor(4)
tensor(4)
tensor(2)
tensor(0)
['SOS', 'look', 'you', 'can', 'have', 'my', 'wallet', '.', '.', '.', 'EOS', 'PAD']
['SOS', 'it', 's', 'not', 'nearly', 'enough', 'punk', '.', 'EOS', 'PAD', 'PAD']
['SOS', 'you', 'want', 'to', '.', 'EOS', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(101)
tensor(1730)
tensor(159)
tensor(96)
tensor

torch.Size([12])
tensor(1)
tensor(562)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'jesus', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'what', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'for', 'old', 't', '.v', '.', 'EOS', 'for', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(591)
tensor(56)
tensor(6086)
tensor(575)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'smell', 'of', 'booze', 'much', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'nice', 'much', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'then', 'why', '?', 'EOS', '?', 'EOS', '?', 'EOS', '!']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(3289)
tensor(3290)
tensor(61)
tensor(37)
tensor(159)
tensor(470)


torch.Size([12])
tensor(1)
tensor(22)
tensor(4)
tensor(59)
tensor(37)
tensor(158)
tensor(843)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'tonight', '.', 'let', 's', 'get', 'together', '.', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'okay', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'you', 're', 'getting', 'off', 'to', 'ask', 'me', 'man', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(25)
tensor(505)
tensor(437)
tensor(7)
tensor(2708)
tensor(37)
tensor(1597)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
['SOS', 'i', 'gotta', 'show', 'you', 'jason', 's', 'grave', '.', 'EOS', 'PAD', 'PAD']
['SOS', 'i', 've', 'seen', 'it', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'okay', 'i', 'like', 'it', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(76)
tensor(37)
tensor(12)
tensor(283)
tensor(25)
tens

torch.Size([12])
tensor(1)
tensor(3164)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'kay', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'yes', 'michael', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'whattaya', 'guess', 'you', 'have', 'to', 'go', 'on', 'faith', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(101)
tensor(68)
tensor(6)
tensor(219)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'he', 'did', '?', 'when', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'while', 'you', 'were', 'in', 'the', 'shower', '.', 'EOS', 'PAD', 'PAD']
['SOS', 'this', 'guy', 'on', 'the', 'johnny', 'carson', 'show', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(53)
tensor(849)
tensor(591)
tensor(115)
tenso

torch.Size([12])
tensor(1)
tensor(464)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'alright', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'absolutely', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'great', 'sir', '.', 'great', '.', 'great', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(80)
tensor(7)
tensor(18)
tensor(40)
tensor(605)
tensor(276)
tensor(1229)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
['SOS', 'would', 'you', 'like', 'to', 'sit', 'down', 'joe', '?', 'EOS', 'PAD', 'PAD']
['SOS', 'yes', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'told', 'you', 'he', 's', 'crazy', '.', 'EOS', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(278)
tensor(50)
tensor(6)
tensor(2)
tensor(0)
tens

torch.Size([12])
tensor(1)
tensor(319)
tensor(37)
tensor(774)
tensor(159)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'something', 's', 'wrong', 'here', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'gee', '.', 'you', 'really', 'think', '?', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'all', 'night', '.', 'EOS', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(1114)
tensor(62)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'sleep', 'okay', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'yeah', '.', 'you', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'break', 'it', 'out', '.', 'EOS', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(147)
tensor(575)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
t

torch.Size([12])
tensor(1)
tensor(780)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'nice', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'what', 's', 'the', 'name', 'of', 'your', 'label', '?', 'EOS', 'PAD']
['SOS', 'i', 'said', 'do', 'you', 'come', 'here', 'often', '!', '?']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(64)
tensor(50)
tensor(37)
tensor(253)
tensor(1363)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'so', 'what', 's', 'his', 'reason', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'maybe', 'sidney', 'wouldn', 't', 'have', 'sex', 'with', 'him', '.', 'EOS']
['SOS', 'that', 's', 'it', '.', 'EOS', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(348)
tensor(64)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
te

torch.Size([12])
tensor(1)
tensor(45)
tensor(115)
tensor(1073)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'this', 'is', 'stupid', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'c', 'mon', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'now', 'you', 'need', 'me', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(124)
tensor(7132)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'oh', 'clayton', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'did', 'i', 'hurt', 'you', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'walker', 'and', 'williams', '.', 'EOS', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(16)
tensor(4)
tensor(25)
tensor(239)
tensor(74)
tensor(36)
t

torch.Size([12])
tensor(1)
tensor(101)
tensor(37)
tensor(159)
tensor(4118)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'he', 's', 'here', 'sheriff', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'why', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'all', 'night', '.', 'EOS', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(1379)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'michael', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'm', 'thinking', 'about', 'it', '.', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'this', 'guy', 'on', 'the', 'johnny', 'carson', 'show', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(88)
tensor(3256)
tensor(4)
tensor(88)
tensor(7594)
t

torch.Size([12])
tensor(1)
tensor(94)
tensor(117)
tensor(746)
tensor(53)
tensor(3312)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'can', 't', 'find', 'the', 'shotgun', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'so', 'whoever', 'did', 'this', 'now', 'has', 'our', 'shotgun', '.', 'EOS']
['SOS', 'this', 'morning', '.', 'EOS', '.', 'EOS', 'this', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(101)
tensor(138)
tensor(12)
tensor(201)
tensor(565)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'he', 'has', 'a', 'little', 'dog', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'oh', 'yes', 'yes', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'don', 't', 'believe', 'you', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(50)
tensor(75)
tensor(6)
tensor(2)
tensor(0)
t

torch.Size([12])
tensor(1)
tensor(6682)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'fergus', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'yeah', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'this', 'guy', 'on', 'your', 't', 'too', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(25)
tensor(200)
tensor(53)
tensor(2739)
tensor(1117)
tensor(117)
tensor(25)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
['SOS', 'i', 'm', 'the', 'princess', 'aren', 't', 'i', '?', 'EOS', 'PAD', 'PAD']
['SOS', 'of', 'course', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'tired', '?', 'EOS', 'surprise', 'you', 'know', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(77)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tenso

torch.Size([12])
tensor(1)
tensor(50)
tensor(68)
tensor(101)
tensor(41)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'what', 'did', 'he', 'say', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'good', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'told', 'you', 'he', 's', 'crazy', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(883)
tensor(4)
tensor(1700)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'mr', '.', 'parker', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'hi', 'professor', '.', 'what', 's', 'up', '?', 'EOS', 'PAD', 'PAD']
['SOS', 'which', 'half', '?', 'EOS', '.', 'EOS', '?', 'EOS', '?']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(124)
tensor(101)
tensor(115)
tensor(64)
tensor(935)
tensor(177)
tenso

torch.Size([12])
tensor(1)
tensor(7515)
tensor(4)
tensor(4)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'hindenburg', '.', '.', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'hindenburg', '.', '.', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'for', 'old', 't', '.v', '.', 'EOS', 'for', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(2307)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'sex', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'could', 'you', 'be', 'more', 'specific', '?', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'for', 'they', 'got', 'to', 'have', 'something', 'that', '?', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(25)
tensor(1330)
tensor(174)
tensor(96)
tensor(139)


torch.Size([12])
tensor(1)
tensor(5)
tensor(92)
tensor(7)
tensor(123)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'where', 'are', 'you', 'going', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'm', 'going', 'home', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'mind', 'your', 'own', 'business', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(131)
tensor(65)
tensor(1015)
tensor(83)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'because', 'they', 'caught', 'me', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'you', 'failed', 'because', 'you', 'had', 'the', 'wrong', 'dream', '.', 'EOS']
['SOS', 'walker', 'and', 'williams', '.', 'EOS', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(4777)
tensor(598)
tensor(83)
tensor(

torch.Size([12])
tensor(1)
tensor(1466)
tensor(117)
tensor(1075)
tensor(170)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'haven', 't', 'called', 'her', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'jesus', 'christ', 'you', 'are', 'an', 'amateur', '.', 'EOS', 'PAD', 'PAD']
['SOS', 'you', '?', 'EOS', '.', 'EOS', '?', 'EOS', '?', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(227)
tensor(98)
tensor(510)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'time', 'for', 'lunch', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'angelo', 'it', 's', 'eleven', 'thirty', '.', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'for', 'what', '?', 'EOS', '?', 'EOS', 'for', 'shop', 'rocker']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(1148)
tensor(4)
tensor(2)
tensor(0)
tensor

torch.Size([12])
tensor(1)
tensor(218)
tensor(76)
tensor(611)
tensor(1556)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'take', 'it', 'easy', 'dude', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'yeah', '.', 'thanks', 'man', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'now', 'you', 'like', 'it', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(36)
tensor(201)
tensor(154)
tensor(199)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'that', 'little', 'old', 'man', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'oh', 'that', 'one', '.', 'that', 's', 'me', 'grandfather', '.', 'EOS']
['SOS', 'oh', 'but', 'he', 'was', 'terrific', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(18)
tensor(53)
tensor(516)
tensor(36)
tensor(1536)

torch.Size([12])
tensor(1)
tensor(124)
tensor(9)
tensor(125)
tensor(4)
tensor(4)
tensor(4)
tensor(66)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'oh', 'my', 'god', '.', '.', '.', '!', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'what', 'happened', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'one', 'bad', 'car', 'accident', '.', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(25)
tensor(200)
tensor(483)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'i', 'm', 'sorry', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'yeah', '.', 'beat', 'how', 'about', 'you', '?', 'EOS', 'PAD', 'PAD']
['SOS', 'it', 's', 'all', 'right', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(115)
tensor(45)
tensor(12)
tensor(306)
tensor(227)
tensor(6)
tensor(2)

torch.Size([12])
tensor(1)
tensor(274)
tensor(234)
tensor(83)
tensor(572)
tensor(66)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'fine', 'by', 'me', 'sir', '!', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'no', 'problem', 'sir', '!', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'do', 'you', 'mean', 'church', 'of', 'england', '?', 'EOS', '?']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(27)
tensor(8)
tensor(40)
tensor(60)
tensor(479)
tensor(66)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'we', 'have', 'to', 'go', 'back', '!', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'no', '!', 'i', 'got', 'to', 'find', 'him', '.', 'EOS', 'PAD']
['SOS', 'you', 'fucking', 'nuts', '?', 'EOS', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(34)
tensor(67)
tensor(112)
tensor(36)
tensor(4)
tensor(2)
t

torch.Size([12])
tensor(1)
tensor(5)
tensor(334)
tensor(101)
tensor(74)
tensor(101)
tensor(37)
tensor(123)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
['SOS', 'where', 'does', 'he', 'think', 'he', 's', 'going', '?', 'EOS', 'PAD', 'PAD']
['SOS', 'when', 'you', 'gotta', 'go', 'you', 'gotta', 'go', '.', 'EOS', 'PAD']
['SOS', 'what', '?', 'what', 'are', 'we', '.', 'EOS', '?', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(1734)
tensor(7)
tensor(14)
tensor(123)
tensor(40)
tensor(259)
tensor(174)
tensor(22)
tensor(4)
tensor(2)
tensor(0)
['SOS', 'anyway', 'you', 're', 'going', 'to', 'meet', 'him', 'tonight', '.', 'EOS', 'PAD']
['SOS', 'i', 'am', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'mind', 'your', 'own', 'business', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(42)
tensor(25)
tensor(74)
tensor(25)
tens

torch.Size([12])
tensor(1)
tensor(4)
tensor(4)
tensor(4)
tensor(67)
tensor(720)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', '.', '.', '.', 'not', 'yet', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'don', 't', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'how', 'hard', 'could', 'it', 'doing', 'for', 'him', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(534)
tensor(76)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'got', 'it', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'how', 's', 'our', 'deal', 'coming', 'along', '?', 'EOS', 'PAD', 'PAD']
['SOS', 'you', 'mean', 'that', '?', 'EOS', 'often', '!', '?', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(1375)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
ten

torch.Size([12])
tensor(1)
tensor(1172)
tensor(83)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'miss', 'me', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', '.', '.', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'it', 's', 'lynn', '.', 'EOS', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(27)
tensor(39)
tensor(12)
tensor(4947)
tensor(4)
tensor(7)
tensor(899)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
['SOS', 'we', 'had', 'a', 'bargain', '.', 'you', 'promised', '.', 'EOS', 'PAD', 'PAD']
['SOS', 'i', 'lied', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'we', 'll', 'try', 'again', '.', 'EOS', 'on', 'the', 'program']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(76)
tensor(37)
tensor(67)
tensor(248)
tensor(12)
tensor(5599)
tensor

torch.Size([12])
tensor(1)
tensor(77)
tensor(102)
tensor(76)
tensor(4466)
tensor(53)
tensor(1104)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'who', 'was', 'it', 'answered', 'the', 'phone', '?', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'none', 'of', 'your', 'business', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'can', 't', 'we', 'do', 'it', '.', 'EOS', '!']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(51)
tensor(4)
tensor(95)
tensor(7128)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'good', '.', 'now', 'pillow', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'pillow', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'll', 'beat', 'your', 'ass', 'EOS', '!', 'EOS', '!']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(16)
tensor(3585)
tensor(4)
tensor(2)
tensor(0)
tensor

torch.Size([12])
tensor(1)
tensor(25)
tensor(132)
tensor(2743)
tensor(211)
tensor(492)
tensor(2328)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'i', 'll', 'fix', 'ya', 'some', 'eggs', '.', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'that', 's', 'okay', 'hon', '.', 'i', 'gotta', 'run', '.', 'EOS']
['SOS', 'tell', 'me', 'about', 'it', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(101)
tensor(102)
tensor(965)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'he', 'was', 'american', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'swiss', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'said', 'do', 'you', 'come', 'here', 'often', '!', '?']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(76)
tensor(116)
tensor(117)
tensor(600)
tensor(4)
tensor

torch.Size([12])
tensor(1)
tensor(908)
tensor(66)
tensor(95)
tensor(41)
tensor(4)
tensor(4)
tensor(4)
tensor(787)
tensor(2)
tensor(0)
tensor(0)
['SOS', 'excellent', '!', 'now', 'say', '.', '.', '.', 'hello', 'EOS', 'PAD', 'PAD']
['SOS', 'hello', '.', '.', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'asked', 'you', '!', 'EOS', '!', 'EOS', '!', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(5178)
tensor(141)
tensor(83)
tensor(75)
tensor(1623)
tensor(5350)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'francis', 'tell', 'me', 'about', 'general', 'cornwallis', '.', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'remember', 'braddock', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'tired', '?', 'EOS', 'die', 'the', 'hell', 'i', 'thought', 'you']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(62)
tensor(895)
tensor(4)


torch.Size([12])
tensor(1)
tensor(25)
tensor(505)
tensor(24)
tensor(53)
tensor(2243)
tensor(6326)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'i', 'gotta', 'know', 'the', 'truth', 'kevin', '.', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'the', 'truth', 'is', 'for', 'suckers', 'johnny', 'boy', '.', 'EOS', 'PAD']
['SOS', 'okay', 'i', 'like', 'it', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(124)
tensor(7)
tensor(24)
tensor(25)
tensor(387)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'oh', 'you', 'know', 'i', 'am', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'has', 'she', 'called', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'hm', '?', 'EOS', '?', 'EOS', 'shop', 'rocker', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(101)
tensor(3626)
tensor(253)
tensor(254)
tensor(1

torch.Size([12])
tensor(1)
tensor(153)
tensor(25)
tensor(7)
tensor(25)
tensor(214)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'were', 'i', 'you', 'i', 'alone', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'yes', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'no', '.', 'EOS', '.', 'EOS', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(401)
tensor(177)
tensor(7)
tensor(14)
tensor(1742)
tensor(302)
tensor(36)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
['SOS', 'come', 'on', 'you', 're', 'prettier', 'than', 'that', '.', 'EOS', 'PAD', 'PAD']
['SOS', 'thanks', 'patrick', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'it', 'was', 'a', 'place', 'to', 'place', 'to', 'look', 'start']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(287)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
ten

torch.Size([12])
tensor(1)
tensor(115)
tensor(45)
tensor(12)
tensor(306)
tensor(227)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'is', 'this', 'a', 'bad', 'time', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'uh', 'no', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'will', 'seek', 'the', 'fortress', 'for', 'you', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(50)
tensor(14)
tensor(7)
tensor(278)
tensor(558)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'what', 're', 'you', 'doing', 'later', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'taking', 'my', 'boyfriend', 'to', 'the', 'airport', '.', 'EOS', 'PAD', 'PAD']
['SOS', 'he', 'is', 'one', 'of', 'the', 'great', 'poets', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(12)
tensor(480)
tensor(2)
tensor(0)
tensor

torch.Size([12])
tensor(1)
tensor(198)
tensor(76)
tensor(577)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'make', 'it', 'five', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'seven', 'fifty', '.', 'positively', 'my', 'last', 'offer', '.', 'EOS', 'PAD']
['SOS', 'you', 'should', 'do', 'it', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(25)
tensor(197)
tensor(117)
tensor(24)
tensor(25)
tensor(260)
tensor(25)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'i', 'don', 't', 'know', 'i', 'mean', 'i', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'you', 'wanna', 'lie', 'down', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'think', 'you', 'know', 'what', 'i', 'mean', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(197)
tensor(117)
tensor(218)
tensor(371)
tensor(4)

torch.Size([12])
tensor(1)
tensor(115)
tensor(45)
tensor(306)
tensor(98)
tensor(174)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'is', 'this', 'bad', 'for', 'him', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'what', 'do', 'you', 'think', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'you', 'wanna', 'go', 'out', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(50)
tensor(37)
tensor(1966)
tensor(159)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'what', 's', 'happening', 'here', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'we', 're', 'going', 'for', 'a', 'walk', '.', 'EOS', 'PAD', 'PAD']
['SOS', 'all', 'night', '.', 'EOS', '.', 'EOS', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(571)
tensor(605)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
te

torch.Size([12])
tensor(1)
tensor(147)
tensor(92)
tensor(7)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'how', 'are', 'you', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'very', 'well', '.', 'thank', 'you', '.', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'oh', 'i', 'could', 'die', '.', '.', '.', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(3179)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'justin', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'think', 'i', 'found', 'something', '.', '.', '.', 'EOS', 'PAD']
['SOS', 'just', 'one', 'kiss', '.', 'for', 'old', 'times', 'sake', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(7)
tensor(14)
tensor(1537)
tensor(66)
tensor(2)
tensor(0)
tensor(

torch.Size([12])
tensor(1)
tensor(787)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'hello', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'hi', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'i', 'm', 'waiting', 'for', 'your', 'mother', '.', 'EOS', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(318)
tensor(4)
tensor(379)
tensor(7)
tensor(424)
tensor(83)
tensor(492)
tensor(912)
tensor(6)
tensor(2)
tensor(0)
['SOS', 'yes', '.', 'could', 'you', 'give', 'me', 'some', 'information', '?', 'EOS', 'PAD']
['SOS', 'gladly', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'okay', '.', 'EOS', '.', 'EOS', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(50)
tensor(47)
tensor(7)
tensor(260)
tensor(50)
ten

torch.Size([12])
tensor(1)
tensor(36)
tensor(1449)
tensor(115)
tensor(479)
tensor(572)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'that', 'lady', 'is', 'back', 'sir', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'show', 'her', 'in', '.', 'then', 'go', 'to', 'bed', '.', 'EOS']
['SOS', 'you', 'fucking', 'nuts', '?', 'EOS', '.', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(77)
tensor(115)
tensor(76)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'who', 'is', 'it', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'it', 's', 'beaumont', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'the', 'man', 'you', 'had', 'the', 'fight', 'with', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(35)
tensor(65)
tensor(389)
tensor(443)
tensor(75)
tensor(

torch.Size([12])
tensor(1)
tensor(111)
tensor(53)
tensor(3432)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'up', 'the', 'coast', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'how', 'far', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'now', 'you', 'need', 'me', '.', 'EOS', '.', 'EOS', '?']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(4508)
tensor(66)
tensor(6)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'gabe', '!', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'what', 'are', 'you', 'doing', 'here', '!', '?', 'EOS', 'PAD', 'PAD']
['SOS', 'mine', '.', 'EOS', '.', 'EOS', '!', 'EOS', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(1640)
tensor(903)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
ten

torch.Size([12])
tensor(1)
tensor(307)
tensor(4842)
tensor(1552)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'try', 'barney', 'clark', '.', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'he', 'is', '.', '.', '.', '?', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'how', 'hard', 'could', 'it', 'on', 'your', 'mother', '.', 'EOS']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(34)
tensor(226)
tensor(76)
tensor(76)
tensor(102)
tensor(1073)
tensor(4)
tensor(2)
tensor(0)
tensor(0)
tensor(0)
['SOS', 'no', 'forget', 'it', 'it', 'was', 'stupid', '.', 'EOS', 'PAD', 'PAD', 'PAD']
['SOS', 'come', 'on', 'what', 'were', 'you', 'going', 'to', 'say', '?', 'EOS']
['SOS', 'looks', 'like', 'you', 'd', 'make', 'a', 'good', 'runner', '.']
--------------------------------------------------------------------------------
torch.Size([12])
tensor(1)
tensor(50)
tensor(6)
tensor(2)
tensor(0)
tensor(0)


torch.Size([12])
tensor(1)
tensor(1502)
tensor(405)
tensor(53)
tensor(301)
tensor(789)
tensor(115)
tensor(96)
tensor(306)
tensor(4292)
tensor(2)
tensor(0)
['SOS', 'hospital', 'says', 'the', 'other', 'kid', 'is', 'in', 'bad', 'shape', 'EOS', 'PAD']
['SOS', 'the', 'shooter', 'local', '?', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['SOS', 'tired', '?', 'EOS', 'to', 'leave', 'you', '.', 'EOS', '.']
--------------------------------------------------------------------------------


In [177]:
src = Variable(torch.LongTensor([[1,2,3,4,5,6,7,8,9,10]]) )
src_mask = Variable(torch.ones(1, 1, 10) )
print(greedy_decode(model, src, src_mask, max_len=10, start_symbol=1))

tensor([[ 1,  2, 10, 10,  9,  5,  3,  2,  9,  5]])


In [None]:
def evaluate(model,sentence,max_length):
    input_tokens=[indexesFromSentence(voc,sentence)]
    input_tokens=torch.LongTensor(input_tokens)
    input_mask=(input_tokens!=0)
    input_tokens=input_tokens.view(1,-1,12)
    input_mask=input_mask.view(-1,12)
    
    output_tokens=greedy_decode(model,input_tokens,input_mask,max_length,1)
    output_tokens=output_tokens.view(-1)
    decoded_words=[voc.index2word[id.item()] for id in output_tokens]
    return decoded_words

def evaluateInput(model,voc,max_length):
    
    input_sentence=''
    
    while(1):
        try:
            input_sentence=input('<')
            if(input_sentence=='q' or input_sentence=='quit'):
                break
            input_sentence=normalizeString(input_sentence)
            output_words=evaluate(model,input_sentence,max_length)
            output_words=[x for x in output_words if not(x=='EOS' or x=='PAD')]
            print('Bot: ',' '.join(output_words))

        except KeyError:
            print("Unknown Words")

    

In [None]:
evaluateInput(model,voc,10)