In [1]:
import torch
import numpy

def get_pad_mask(seq, pad_idx):
    return (seq != pad_idx).unsqueeze(-2)

def get_subsequent_mask(seq):
    """For masking out the subsequent info."""
    sz_b, len_s = seq.size()
    subsequent_mask = (1 - torch.triu(
        torch.ones((1, len_s, len_s), device=seq.device), diagonal=1)).bool()
    
    return subsequent_mask

X = torch.tensor([[1, 2, 3], [0, 3, 4], [1, 0, 3]])
print(X)
print(X.shape)
Y = get_pad_mask(X, 0)
print(Y)
print(Y.shape)
Z = get_subsequent_mask(X)
print(Z)
print(Z.shape)

tensor([[1, 2, 3],
        [0, 3, 4],
        [1, 0, 3]])
torch.Size([3, 3])
tensor([[[ True,  True,  True]],

        [[False,  True,  True]],

        [[ True, False,  True]]])
torch.Size([3, 1, 3])
tensor([[[ True, False, False],
         [ True,  True, False],
         [ True,  True,  True]]])
torch.Size([1, 3, 3])


In [2]:
X = torch.rand((16, 256, 128))
print("X.shape: ", X.shape)
# X = X.transpose(0, 1)
X_steps = X.chunk(256, 1)
X_steps = [sample.squeeze(1) for sample in X_steps]
print("X_steps size:", len(X_steps))
print("X_steps[0[.shape: ", X_steps[0].shape)
print("X_steps[0]: ", X_steps[0])

X.shape:  torch.Size([16, 256, 128])
X_steps size: 256
X_steps[0[.shape:  torch.Size([16, 128])
X_steps[0]:  tensor([[0.7357, 0.8787, 0.5362,  ..., 0.9408, 0.4598, 0.4150],
        [0.0955, 0.8765, 0.0617,  ..., 0.7000, 0.6588, 0.8330],
        [0.7069, 0.3405, 0.1005,  ..., 0.5733, 0.8304, 0.2003],
        ...,
        [0.3625, 0.0768, 0.6233,  ..., 0.0704, 0.6336, 0.4173],
        [0.0625, 0.3860, 0.9970,  ..., 0.3127, 0.6703, 0.9729],
        [0.4361, 0.4837, 0.7813,  ..., 0.5262, 0.5615, 0.4900]])


In [3]:
from BagOfWords import *
vectorizer, vocab, vocab_size = build_vectorizer_and_vocab([
    'Game of Thrones is an amazing tv series!',
    'Game of Thrones is the best tv series!',
    'Game of Thrones is so great'])

print("bag_of_words_presentation:")
print(bag_of_words_presentation('Game of Thrones is an great tv series! an an an', vectorizer, vocab_size))
print("vocabulary:")
print(vocab)
print(vocab['thrones'])

bag_of_words_presentation:
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0., 1., 1., 0., 0., 0., 1., 0.],
        [0., 1., 0., 1., 0., 1., 1., 0., 0., 0., 1., 0.],
        [0., 1., 0., 1., 1., 1., 1., 0., 0., 0., 1., 0.],
        [0., 1., 0., 1., 1., 1., 1., 0., 0., 0., 1., 1.],
        [0., 1., 0., 1., 1., 1., 1., 1., 0., 0., 1., 1.],
        [0., 2., 0., 1., 1., 1., 1., 1., 0., 0., 1., 1.],
        [0., 3., 0., 1., 1., 1., 1., 1., 0., 0., 1., 1.],
        [0., 4., 0., 1., 1., 1., 1., 1., 0., 0., 1., 1.]])
vocabulary:
{'game': 3, 'of': 6, 'thrones': 10, 'is': 5, 'an': 1, 'amazing': 0, 'tv': 11, 'series': 7, 'the': 9, 'best': 2, 'so': 8, 'great': 4}
10
