In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import TransformerDecoder,TransformerDecoderLayer
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.nn import Transformer
import pandas as pd
import torch.optim as optim
import itertools

In [2]:
from functools import lru_cache
import gensim
import gensim.downloader as api
from gensim.models import KeyedVectors
import gensim.utils as utils

import nltk
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
cached_lemmatize = lru_cache(maxsize=50000)(WordNetLemmatizer().lemmatize)
from gensim.utils import simple_preprocess, to_unicode

unable to import 'smart_open.gcs', disabling that module
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\mapka\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
class BiLSTMEncoder(nn.Module):
    def __init__(self, input_dim,emb_dim,enc_hid_dim,dec_hid_dim,dropout=0.5):
        
        super(Encoder,self).__init__()
        
        self.input_dim = input_dim
        self.emb_dim = emb_dim
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim
        self.dropout = dropout
        
        self.embedding = nn.Embedding(input_dim,emb_dim)
        
        self.rnn = nn.LSTM(emb_dim, enc_hid_dim, bidirectional = True)
        
        self.fc = nn.Linear( enc_hid_dim * 2, dec_hid_dim )
        
        self.dropout = nn.Dropout( dropout )
        
    def forward(self, X):
        
        embedded = self.dropout(self.embedding(X))
        
        outputs, hidden = self.rnn(embedded)
        
        hidden = F.tanh( self.fc ( torch.cat( (hidden[-2,:,:], hidden[-1, : , : ] ), dim = 1 ) ) )
        
        return outputs, hidden


        
        

In [4]:
base_dir = "data"
train_file_X = os.path.join(base_dir,"train.source")
train_file_y = os.path.join(base_dir,"train.target")
test_file_X = os.path.join(base_dir,"test.source")
test_file_y = os.path.join(base_dir,"test.target")
val_file_X = os.path.join(base_dir,"val.source")
val_file_y = os.path.join(base_dir,"val.target")


In [5]:
import re
import string

STOP_WORDS = ["i", "a", "about", "an", "are", "as", "at", "be", "by", 
                "for", "from", "how", "in", "is", "it", "of", "on", "or", "that", "the", 
                "this", "to", "was", "what", "when", "where", "who", "will", "with"]

def ExpandContractions(contraction):

    contraction = re.sub(r"won\'t", "will not", contraction)
    contraction = re.sub(r"can\'t", "can not", contraction)

    contraction = re.sub(r"n\'t", " not", contraction)
    contraction = re.sub(r"\'re", " are", contraction)
    contraction = re.sub(r"\'s", " is", contraction)
    contraction = re.sub(r"\'d", " would", contraction)
    contraction = re.sub(r"\'ll", " will", contraction)
    contraction = re.sub(r"\'t", " not", contraction)
    contraction = re.sub(r"\'ve", " have", contraction)
    contraction = re.sub(r"\'m", " am", contraction)

    return contraction

def PreProcess(line):
    
    line = line.translate(str.maketrans("", "", string.punctuation))
    line = ExpandContractions(line)
    line = simple_preprocess(to_unicode(line))
    line = [cached_lemmatize(word) for word in line if word not in STOP_WORDS]

    line = " ".join(line)
    return line

In [6]:
class LineSentenceGenerator(object):

    def __init__(self, source, preprocess=None, max_sentence_length=4000, limit=None, preprocess_flag=True):
        self.source = source
        self.max_sentence_length = max_sentence_length
        self.limit = limit
        self.input_files = []

        if preprocess != None and callable(preprocess) and preprocess_flag:
            self.preprocess = preprocess
        else:
            self.preprocess = lambda line: line.rstrip("\r\n")

        if isinstance(self.source, list):
            print('List of files given as source. Verifying entries and using.')
            self.input_files = [filename for filename in self.source if os.path.isfile(filename)]
            self.input_files.sort()  # makes sure it happens in filename order

        elif os.path.isfile(self.source):
            print('Single file given as source, rather than a list of files. Wrapping in list.')
            self.input_files = [self.source]  # force code compatibility with list of files

        elif os.path.isdir(self.source):
            self.source = os.path.join(self.source, '')  # ensures os-specific slash at end of path
            print('Directory of files given as source. Reading directory %s', self.source)
            self.input_files = os.listdir(self.source)
            self.input_files = [self.source + filename for filename in self.input_files]  # make full paths
            self.input_files.sort()  # makes sure it happens in filename order
        else:  # not a file or a directory, then we can't do anything with it
            raise ValueError('Input is neither a file nor a path nor a list')
        print('Files read into LineSentenceGenerator: %s' % ('\n'.join(self.input_files)))

        self.token_count = 0

    def __iter__(self):
        for file_name in self.input_files:
            print('Reading file %s', file_name)
            with open(file_name, 'rb') as fin:
                for line in itertools.islice(fin, self.limit):
                    line = self.preprocess(utils.to_unicode(line))
                    self.token_count += len(line)
                    i = 0
                    while i < len(line):
                        yield line[i:i + self.max_sentence_length]
                        i += self.max_sentence_length

    def __len__(self):
        if self.token_count > 0:
            return self.token_count
        else:
            return len(self.input_files)

    def __bool__(self):
        return self.has_data()

    def is_empty(self):
        return len(self.input_files) == 0

    def has_data(self):
        return not self.is_empty()

In [7]:
from torchtext.data import Dataset,Example
from torchtext.data import Field, BucketIterator
from torchtext.data.utils import get_tokenizer

SRC = Field(tokenize = get_tokenizer("spacy"),
            init_token = '<sos>',
            eos_token = '<eos>',
            lower = False)

# TRG = Field(tokenize = get_tokenizer("basic_english"),
#             init_token = '<sos>',
#             eos_token = '<eos>',
#             is_target = True,
#             lower = False)

In [8]:
def read_data(X,y,limit=1000):
    examples = []
    fields = {'text-tokens': ('text', SRC),
              'summ-tokens': ('summ', SRC)}
    for i,(x,y) in enumerate(zip(LineSentenceGenerator(X,PreProcess),LineSentenceGenerator(y,PreProcess))):
        if i > limit:
            break
        text_field =x 
        summ_field = y 
       
        e = Example.fromdict({"text-tokens": text_field, "summ-tokens": summ_field},
                             fields=fields)
        examples.append(e)
    print("examples: \n", examples[0])
    return Dataset(examples, fields=[('text', SRC), ('summ', SRC)])

In [9]:
train_data = read_data(train_file_X,train_file_y,1000)
test_data = read_data(test_file_X,test_file_y,200)
val_data = read_data(val_file_X,val_file_y,200)

Single file given as source, rather than a list of files. Wrapping in list.
Files read into LineSentenceGenerator: data\train.source
Single file given as source, rather than a list of files. Wrapping in list.
Files read into LineSentenceGenerator: data\train.target
Reading file %s data\train.source
Reading file %s data\train.target
examples: 
 <torchtext.data.example.Example object at 0x00000282089EB7C8>
Single file given as source, rather than a list of files. Wrapping in list.
Files read into LineSentenceGenerator: data\test.source
Single file given as source, rather than a list of files. Wrapping in list.
Files read into LineSentenceGenerator: data\test.target
Reading file %s data\test.source
Reading file %s data\test.target
examples: 
 <torchtext.data.example.Example object at 0x0000028219345B48>
Single file given as source, rather than a list of files. Wrapping in list.
Files read into LineSentenceGenerator: data\val.source
Single file given as source, rather than a list of files.

In [10]:
print("text: ",train_data[0].text)
print("\ntext-len: ",len(train_data[0].text))
print("\n\nsummary: ",train_data[0].summ)

text:  ['editor', 'note', 'our', 'behind', 'scene', 'series', 'cnn', 'correspondent', 'share', 'their', 'experience', 'covering', 'news', 'and', 'analyze', 'story', 'behind', 'event', 'here', 'soledad', 'obrien', 'take', 'user', 'inside', 'jail', 'many', 'inmate', 'mentally', 'ill', 'inmate', 'housed', 'forgotten', 'floor', 'many', 'mentally', 'ill', 'inmate', 'housed', 'miami', 'before', 'trial', 'miami', 'florida', 'cnn', 'ninth', 'floor', 'miamidade', 'pretrial', 'detention', 'facility', 'dubbed', 'forgotten', 'floor', 'here', 'inmate', 'most', 'severe', 'mental', 'illness', 'incarcerated', 'until', 'they', 're', 'ready', 'appear', 'court', 'most', 'often', 'they', 'face', 'drug', 'charge', 'charge', 'assaulting', 'officer', 'charge', 'judge', 'steven', 'leifman', 'say', 'usually', 'avoidable', 'felony', 'he', 'say', 'arrest', 'often', 'result', 'confrontation', 'police', 'mentally', 'ill', 'people', 'often', 'wo', 'nt', 'do', 'they', 're', 'told', 'police', 'arrive', 'scene', 'conf

In [11]:
train_data.fields

{'text': <torchtext.data.field.Field at 0x282089830c8>,
 'summ': <torchtext.data.field.Field at 0x282089830c8>}

In [12]:
print("text: ", test_data[100].text)
print("\n\nsumm: ",test_data[100].summ)

text:  ['cnna', 'frenchlanguage', 'global', 'television', 'network', 'regained', 'control', 'one', 'it', 'channel', 'thursday', 'after', 'cyberattack', 'day', 'earlier', 'crippled', 'it', 'broadcast', 'and', 'social', 'medium', 'account', 'television', 'network', 'tv', 'monde', 'gradually', 'regaining', 'control', 'it', 'channel', 'and', 'social', 'medium', 'outlet', 'after', 'suffering', 'network', 'director', 'called', 'extremely', 'powerful', 'cyberattack', 'addition', 'it', 'channel', 'tv', 'monde', 'lost', 'control', 'it', 'social', 'medium', 'outlet', 'and', 'it', 'website', 'director', 'yves', 'bigot', 'said', 'video', 'message', 'posted', 'later', 'facebook', 'mobile', 'site', 'which', 'still', 'active', 'network', 'said', 'hacked', 'islamist', 'group', 'isi', 'logo', 'and', 'marking', 'appeared', 'tv', 'monde', 'social', 'medium', 'account', 'but', 'there', 'no', 'immediate', 'claim', 'responsibility', 'isi', 'any', 'other', 'group', 'day', 'broke', 'thursday', 'europe', 'netw

In [13]:
SRC.build_vocab(train_data.text, min_freq = 2)
# TRG.build_vocab(train_data.summ, min_freq = 2)

In [14]:
len(SRC.vocab.itos)

14744

In [15]:
device = torch.device('cpu')

BATCH_SIZE = 128

train_iter = BucketIterator(train_data,BATCH_SIZE, shuffle=True,
                                                 sort_key=lambda x: len(x.text), sort_within_batch=True)

val_iter = BucketIterator(val_data, BATCH_SIZE, sort_key=lambda x: len(x.text), sort_within_batch=True)
test_iter = BucketIterator(test_data,BATCH_SIZE, sort_key=lambda x: len(x.text), sort_within_batch=True)

In [16]:
for batch in train_iter:
    print(batch.text.shape,"\n\n")
    x = batch.text
    print(x)
#     print(batch.text[:-1,:].shape)
    print("text: ",[src_list[i] for i in x.squeeze(1).transpose(0,1)[0].tolist()])
    y = batch.summ
    print("\nsumm: ",[src_list[i] for i in y.squeeze(1).transpose(0,1)[0].tolist()])
    
    

torch.Size([450, 128]) 


tensor([[   2,    2,    2,  ...,    2,    2,    2],
        [2825,  313,  252,  ...,   22,   22,   22],
        [ 335,  436,   22,  ..., 5436,   85, 5613],
        ...,
        [   6,   42,    1,  ...,    1,    1,    1],
        [   5,    3,    1,  ...,    1,    1,    1],
        [   3,    1,    1,  ...,    1,    1,    1]])


NameError: name 'src_list' is not defined

In [17]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

In [18]:
import math
class TransformerSummarizer(nn.Module):
    def __init__(self, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, max_seq_length,vocab_size, pad_idx,  d_model=None, pos_dropout =0.1, trans_dropout= 0.1,embeddings=None):
        super().__init__()
       
        if embeddings is None:
            self.embed_src = nn.Embedding(vocab_size, d_model)
            self.embed_tgt = nn.Embedding(vocab_size, d_model)
        else:
            d_model = embeddings.size(1)
            self.d_model = embeddings.size(1)
            self.embed_src = nn.Embedding(*embeddings.shape)
            self.embed_src.weight = nn.Parameter(embeddings,requires_grad=False)
            
            self.embed_tgt = nn.Embedding(*embeddings.shape)
            self.embed_tgt.weight = nn.Parameter(embeddings,requires_grad=False)
        
        
        self.pos_enc = PositionalEncoding(d_model, pos_dropout, max_seq_length)

        self.transformer = nn.Transformer(d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, trans_dropout)
        
        self.fc = nn.Linear(d_model, vocab_size)
        
        self.pad_idx = pad_idx
        
        self.src_mask = None
        self.tgt_mask = None
        self.memory_mask = None
        
    def generate_square_mask(self,sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask
    
    def make_pad_mask(self,seq,pad_idx):
        mask = (seq == pad_idx).transpose(0,1)
        return mask
    

    def forward(self, src, tgt):
        if self.tgt_mask is None or self.tgt_mask.size(0) != len(trg):
            self.trg_mask = self.generate_square_mask(len(trg)).to(trg.device)
        
#         print("Before Embed: ",src.shape,tgt.shape,sep="\n")
        
        src_pad_mask = self.make_pad_mask(src,self.pad_idx)
        tgt_pad_mask = self.make_pad_mask(tgt,self.pad_idx)
        
        print("src_pad_mask: ",src_pad_mask,"\n *****DONE****")
        print("trg_pad_mask: ",tgt_pad_mask,"\n *****DONE****")
        

        
        
        src = self.pos_enc(self.embed_src(src) * math.sqrt(self.d_model))

        tgt = self.pos_enc(self.embed_tgt(tgt) * math.sqrt(self.d_model))
        print(tgt.shape)
        

        output = self.transformer(src, tgt, src_mask=self.src_mask, tgt_mask=self.tgt_mask, memory_mask=self.memory_mask, 
                                 src_key_padding_mask=src_pad_mask, tgt_key_padding_mask=tgt_pad_mask, memory_key_padding_mask=src_pad_mask)
        
        return self.fc(output)
        
        
        

        
        

In [19]:
# TGT_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]
PAD_IDX = SRC.vocab.stoi[SRC.pad_token]

criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)

In [20]:
# trg = len(TRG.vocab)
# EMB_DIM = 200
SEQ_LEN = 4000

D_MODEL = 200 #embedding_size
DIM_FEEDFORWARD = 300
VOCAB_SIZE = len(SRC.vocab)
print(VOCAB_SIZE)
ATTENTION_HEADS = 6
N_LAYERS = 1



# vocab_size, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, max_seq_length, pos_dropout, trans_dropout

14744


In [21]:
from torchtext.vocab import FastText

ff = FastText("en")

In [22]:
embeddings = ff.vectors

In [23]:
embeddings =  ff.get_vecs_by_tokens(SRC.vocab.itos)

In [24]:
embeddings.shape

torch.Size([14744, 300])

In [28]:
model = TransformerSummarizer( ATTENTION_HEADS,N_LAYERS, N_LAYERS, DIM_FEEDFORWARD, SEQ_LEN,VOCAB_SIZE,PAD_IDX,embeddings=embeddings).to(device)

In [27]:
model

TransformerSummarizer(
  (embed_src): Embedding(14744, 300)
  (embed_tgt): Embedding(14744, 300)
  (pos_enc): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): Linear(in_features=300, out_features=300, bias=True)
          )
          (linear1): Linear(in_features=300, out_features=300, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=300, out_features=300, bias=True)
          (norm1): LayerNorm((300,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((300,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
      )
      (norm): LayerNorm((300,), eps=1e-05, elementwise_affine=True)
    )
    (decoder): Tr

In [29]:
import math
import time
from tqdm.notebook import tqdm_notebook as tqdm

def train(model: nn.Module,
          iterator: BucketIterator,
          num_batches: int,
          optimizer: optim.Optimizer,
          criterion: nn.Module,
          clip: float):
    
    print("Training......")

    model.train()

    epoch_loss = 0

    for batch in tqdm(iterator,total=num_batches):
        
#         if i == 1:
#             break

        src = batch.text
        trg = batch.summ
        
#         tgt_inp, tgt_out = tgt[:, :-1], tgt[:, 1:]
#         tgt_mask = gen_nopeek_mask(tgt_inp.shape[1]).to('cuda')

#         trg_inp = trg[:,:-1] 

        trg_inp, trg_out = trg[:-1, :], trg[1:, :]

        optimizer.zero_grad()

        output = model(src.to(device), trg_inp.to(device))
    
        output = output.view(-1, output.shape[-1])

        loss = criterion(output, trg_out.view(-1))

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step()

        epoch_loss += loss.item()
        
    print("Training Done.....")

    return epoch_loss / len(iterator)

In [30]:
def evaluate(model: nn.Module,
             iterator: BucketIterator,
             num_batches:int,
             criterion: nn.Module):

    model.eval()

    epoch_loss = 0
    
    print("Evaluating....")
    
    with torch.no_grad():

        for batch in tqdm(enumerate(iterator),total=num_batches):
            
            if i == 1:
                break
            src = batch.text
            trg = batch.summ
        
#             trg_inp, trg_out = trg[:-1, :], trg[1:, :]

            output = model(src.to(device), trg_inp.to(device))

            output = output.view(-1,output.shape[-1])

            loss = criterion(output, trg[1:,:].view(-1))

            epoch_loss += loss.item()
            
        print("Evaluating Done........")

    return epoch_loss / len(iterator)

In [31]:
src_list = SRC.vocab.itos
src_dict = SRC.vocab.stoi


In [36]:
from einops import rearrange
for i,batch in enumerate(train_iter):
    if i == 1:
        break
    src = batch.text
    trg = batch.summ
    print(src.shape)
    print(trg.shape)
    trg_inp, trg_out = trg[:-1, :], trg[1:, :]
    
    
    print("text: ",[src_list[i] for i in src.squeeze(1).transpose(0,1)[0].tolist()])
    
    print("\n\nsumm: ",[src_list[i] for i in trg.squeeze(1).transpose(0,1)[0].tolist()])
    
    
    
    
    
    print('trg_inp: ',trg_inp.shape)
    print('trg_out: ',trg_out.shape)
    
    memory = model.transformer.encoder(model.pos_enc(model.embed_src(src) * math.sqrt(model.d_model)))
        
    out = model.fc(model.transformer.decoder(model.pos_enc(model.embed_tgt(trg) * math.sqrt(model.d_model)), memory))

#     out = model(src.to(device),trg_inp.to(device))
    
#     print(f'out: {out.shape}')
#     print(rearrange(out,'t b e -> (b t) e ').shape)
    
#     print(rearrange(out,'t b e -> (b t) e ').shape)
    
#     print(rearrange(trg_out, 'o b -> (b o)').shape)
    
    out_  = rearrange(out,'t b e -> b t e')
    
    print("out:\n",out,"\n********Done*******")
    
    print("\n\nargmax: ",out_.argmax(2)[0].tolist())
    
    l = out_.argmax(2)[0].tolist()
    
    print([src_list[i] for i in l])
    
    output_dim = out.shape[-1]
    
    print("view adjusted: \n",out.view(-1,output_dim),"\n*******Done********")
    
    print("view-adjusted-shape: ",out.view(-1,output_dim).shape)
    
    loss = criterion(out.view(-1, output_dim), trg_out.view(-1))
    
    print(loss.item())
    
    
    
#     del src,trg,out
#     torch.cuda.empty_cache()

# del batch

torch.Size([215, 128])
torch.Size([46, 128])
text:  ['<sos>', 'london', 'england', 'milan', 'goalkeeper', 'dida', 'ha', 'been', 'cleared', 'play', 'next', 'month', 'champion', 'league', 'match', 'shakhtar', 'donetsk', 'after', 'partially', 'winning', 'his', 'appeal', 'uefa', 'against', 'twomatch', 'ban', 'dida', 'ha', 'had', 'one', 'game', 'his', 'twomatch', 'ban', 'suspended', 'year', 'following', 'appeal', 'uefa', 'brazilian', 'dida', 'also', 'fined', 'swiss', 'franc', 'european', 'football', 'ruling', 'body', 'following', 'incident', 'involving', 'supporter', 'during', 'champion', 'clash', 'against', 'celtic', 'scotland', 'october', 'yearold', 'brazilian', 'initially', 'banned', 'two', 'game', 'his', '<unk>', 'following', 'celtic', 'fan', 'encroachment', 'onto', 'pitch', 'during', 'defeat', 'celtic', 'park', 'following', 'monday', 'appeal', 'hearing', 'dida', 'suspended', 'two', 'match', 'but', 'one', 'match', 'now', 'deferred', '<unk>', 'period', 'one', 'year', 'said', '<unk>', 'ue

ValueError: Expected input batch_size (5888) to match target batch_size (5760).

In [54]:
x = torch.zeros(44,127)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [143]:

# Running too long
# need to fix this

def epoch_time(start_time: int,
               end_time: int):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

parameters = filter(lambda p:p.requires_grad, model.parameters())
optimizer = optim.Adam(parameters)
num_batches = math.ceil(len(train_data)/BATCH_SIZE)
val_batches = math.ceil(len(val_data)/BATCH_SIZE)

N_EPOCHS = 1
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()

    train_loss = train(model, train_iter, num_batches,optimizer, criterion, CLIP)
    valid_loss = evaluate(model, val_iter,val_batches, criterion)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f}')
    
test_size = math.ceil(len(test_data)/BATCH_SIZE)
test_loss = evaluate(model, test_iter,test_size, criterion)

print(f'| Test Loss: {test_loss:.3f}')

Training......


HBox(children=(FloatProgress(value=0.0, max=8.0), HTML(value='')))

text:  ['<sos>', 'cnn', 'national', 'football', 'league', 'ha', 'indefinitely', 'suspended', 'atlanta', 'falcon', 'quarterback', 'michael', 'vick', 'without', 'pay', 'official', 'league', 'said', 'friday', 'nfl', 'star', 'michael', 'vick', 'set', 'appear', 'court', 'monday', 'judge', 'have', 'final', 'say', 'plea', 'deal', 'earlier', 'vick', 'admitted', 'participating', 'dogfighting', 'ring', 'part', 'plea', 'agreement', 'federal', 'prosecutor', 'virginia', 'your', 'admitted', 'conduct', 'not', 'only', 'illegal', 'but', 'also', 'cruel', 'and', 'reprehensible', 'your', 'team', 'nfl', 'and', 'nfl', 'fan', 'have', 'all', 'been', 'hurt', 'your', 'action', 'nfl', 'commissioner', 'roger', 'goodell', 'said', 'letter', 'vick', 'goodell', 'said', 'he', 'would', 'review', 'status', 'suspension', 'after', 'legal', 'proceeding', 'over', 'paper', 'filed', 'friday', 'federal', 'court', 'virginia', 'vick', 'also', 'admitted', 'he', 'and', 'two', 'coconspirator', 'killed', 'dog', 'did', 'not', 'fight'

ValueError: Expected input batch_size (5760) to match target batch_size (5632).

In [41]:
idx_to_text(s):
    return [trg_list[]]

2

In [83]:
x =torch.tensor([[1,0,0],[0,1,0],[0,0,1]])

In [96]:
m1 =(x!=0).unsqueeze(1).unsqueeze(2)

3

In [100]:
m2= torch.tril(torch.ones((x.size(0), x.size(0)), device =device)).bool()

In [94]:
m2

tensor([[0., -inf, -inf],
        [0., 0., -inf],
        [0., 0., 0.]])

In [97]:
m1

tensor([[[[ True, False, False]]],


        [[[False,  True, False]]],


        [[[False, False,  True]]]])

In [102]:
(m1 & m2).shape

torch.Size([3, 1, 3, 3])

In [41]:
s = ''

for i,line in enumerate(LineSentenceGenerator(train_file_X,PreProcess)):
    if i == 1:
        break
    s = line
s

Single file given as source, rather than a list of files. Wrapping in list.
Files read into LineSentenceGenerator: data\train.source
Reading file %s data\train.source


'editor note our behind scene series cnn correspondent share their experience covering news and analyze story behind event here soledad obrien take user inside jail many inmate mentally ill inmate housed forgotten floor many mentally ill inmate housed miami before trial miami florida cnn ninth floor miamidade pretrial detention facility dubbed forgotten floor here inmate most severe mental illness incarcerated until theyre ready appear court most often they face drug charge charge assaulting officer charge judge steven leifman say usually avoidable felony he say arrest often result confrontation police mentally ill people often wont do theyre told police arrive scene confrontation seems exacerbate their illness and they become more paranoid delusional and le likely follow direction according leifman so they end up ninth floor severely mentally disturbed but not getting any real help because theyre jail we toured jail leifman he well known miami advocate justice and mentally ill even th

In [42]:
def make_sequence(s):
    

tensor([[    2, 11957,     3],
        [    2,  5099,     3],
        [    2,     0,     3],
        ...,
        [    2, 11957,     3],
        [    2, 13191,     3],
        [    2,  5099,     3]])

In [56]:
x.transpose(0,1).unsqueeze(1).shape

torch.Size([3115, 1, 3])

In [51]:
x.shape

torch.Size([3, 3115])