In [1]:
import torch
import torch.nn as nn
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import io
import math

In [2]:
EMBEDDING_SIZE = 2
input_text = 'text.txt'

char_to_token = {}
token_to_char = {}
def load_char_to_token():
    global char_to_token
    with io.open(input_text, 'r', encoding='utf-8') as f:
        while True:
            c = f.read(1)
            if not c:
                break
            
            if c not in char_to_token:
                next_token = len(char_to_token)
                char_to_token[c] = next_token
                token_to_char[next_token] = c
load_char_to_token()

def tokens_from_file(path):
    tokens = []
    with io.open(input_text, 'r', encoding='utf-8') as f:
        tokens = [[char_to_token[c] for c in f.read()]]
    return torch.LongTensor(tokens)

def tokens_to_string(tokens):
    if isinstance(tokens, torch.Tensor):
        return ''.join([token_to_char[t.item()] for t in tokens.squeeze(0)])
    else:
        return ''.join([token_to_char[t] for t in tokens])

_embedding = nn.Embedding(len(char_to_token), EMBEDDING_SIZE)
def get_embedding_from_str(in_str):
    tokens = [[char_to_token[c]] for c in in_str]
    return _embedding(torch.LongTensor(tokens))

def get_embedding(tensor):
    return _embedding(tensor)

In [3]:
tokens_from_file(input_text)

tensor([[ 0,  0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  1,  5,  6,  7,  8,  7,  9,
         10,  9, 11, 12,  5,  8, 13,  7, 11, 14,  7,  4, 11, 15,  7,  9, 11,  7,
          9, 12,  8,  5, 14,  7,  8,  7,  6, 16, 17, 10, 16, 14, 18, 16, 19,  9,
         11, 19,  6, 16, 17, 10, 16, 14, 18, 16,  7, 20, 11, 21, 16, 13,  7,  9,
          4,  8,  9,  7, 10,  6, 16,  6,  7,  9,  4, 16,  7, 14, 14, 22,  3, 12,
          8, 14,  6, 23, 11, 12, 20, 16, 12,  7, 20, 11, 21, 10, 13, 16, 22, 24,
         24, 25, 26,  3, 11, 12, 18,  4,  7, 27, 22, 28,  7, 12, 16, 13, 16,  8,
          6, 16,  7,  5, 14, 18, 13, 10, 21, 16,  6,  7,  8,  7,  6,  9,  8, 14,
         21,  8, 12, 21,  7,  9, 12,  8, 14,  6, 23, 11, 12, 20, 16, 12,  7, 20,
         11, 21, 10, 13, 16,  7, 29,  8,  6, 16, 21,  7, 11, 14,  7,  9,  4, 16,
          7, 30,  8, 30, 16, 12,  7, 31,  9,  9, 16, 14,  9,  5, 11, 14,  7,  5,
          6,  7, 31, 13, 13,  7, 32, 11, 10,  7, 33, 16, 16, 21, 22,  7,  3,  4,
         16,  7,  9, 12,  8,

In [4]:
tokens_to_string([1, 2, 3, 4, 23])

'™😀Thf'

In [5]:
t = tokens_from_file(input_text)
tokens_to_string(t)

'\x00\x00\x00\x00™😀This ™is a tutorial on how to train a sequence-to-sequence model that uses the nn.Transformer module.\n\nPyTorch 1.2 release includes a standard transformer module based on the paper Attention is All You Need. The transformer model has been proved to be superior in quality for many sequence-to-sequence problems while being more parallelizable. The nn.Transformer module relies entirely on an attention mechanism (another module recently implemented as nn.MultiheadAttention) to draw global dependencies between input and output. The nn.Transformer module is now highly modularized such that a single component (like nn.TransformerEncoder in this tutorial) can be easily adapted/composed.\n\n\n\n\n😀\ngrinning face\nUnicode: U+1F600, UTF-8: F0 9F 98 80'

In [6]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

In [51]:
# input_x = get_embedding_from_str('    ')
input_x = get_embedding(tokens_from_file(input_text))
print(input_x.size())
input_embedding = input_x * math.sqrt(EMBEDDING_SIZE)
pe = PositionalEncoding(EMBEDDING_SIZE)
positional_embedding = pe(input_embedding)

print(positional_embedding)
print(positional_embedding.size())

torch.Size([1, 750, 2])
tensor([[[ 2.6182, -0.2332],
         [ 2.6182, -0.2332],
         [ 2.6182, -0.2332],
         ...,
         [-0.0075,  4.4766],
         [ 0.6296,  1.6758],
         [-1.1378, -2.7070]]], grad_fn=<MulBackward0>)
torch.Size([1, 750, 2])


In [53]:
sz = 1
src_mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
src_mask = src_mask.float().masked_fill(src_mask == 0, float('-inf')).masked_fill(src_mask == 1, float(0.0))
src_mask

tensor([[0.]])

In [60]:
print(positional_embedding.size())
print(len(char_to_token))

torch.Size([1, 750, 2])
52


In [61]:
nlayers = 1
nhead = 1
nhid = 1
dropout = 0
encoder_layers = TransformerEncoderLayer(EMBEDDING_SIZE, nhead, nhid, dropout)
transformer_encoder = TransformerEncoder(encoder_layers, nlayers)

output = transformer_encoder(positional_embedding, src_mask)  # src_mask ?
decoder = nn.Linear(EMBEDDING_SIZE, len(char_to_token))
output = decoder(output)
output

tensor([[[ 0.1385, -0.2167, -0.3392,  ...,  0.8722, -0.3404, -0.2313],
         [ 0.1385, -0.2167, -0.3392,  ...,  0.8722, -0.3404, -0.2313],
         [ 0.1385, -0.2167, -0.3392,  ...,  0.8722, -0.3404, -0.2313],
         ...,
         [-1.0016,  0.4868,  0.9653,  ...,  0.4545, -0.2234,  1.4450],
         [-1.0016,  0.4868,  0.9653,  ...,  0.4545, -0.2234,  1.4450],
         [ 0.1385, -0.2167, -0.3392,  ...,  0.8722, -0.3404, -0.2313]]],
       grad_fn=<AddBackward0>)

In [None]:
char_to_token['d']

In [None]:
a = torch.randn(10)

In [None]:
import torch.nn as nn

In [None]:
dictionary = 'abcdefghiklmnopqrstuvwxyz '
def tokenize(s):
    return [dictionary.index(c) for c in s]
embed = nn.Embedding(len(dictionary), 3)
embed(torch.LongTensor(tokenize('hello world')))

In [None]:
embed(torch.LongTensor([10]))

In [None]:
embed.weight

In [None]:
torch.LongTensor([1, 2])

In [None]:
torch.LongTensor(1)

In [None]:
dictionary.index('z')

In [None]:
import math
max_len = 8
d_model = 4
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
pe