# Seminar 4

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## LSTM

(Simple example, will work more during next seminars)

Input: $(L,N,H_{in})$

$N$ - batch size

$L$ - maximum length

$H_{in}$ - size of an input vectror (dim of vector for every word)

In [None]:
inputs = torch.randn(4, 1, 3)
inputs

tensor([[[ 1.2960, -0.9720, -0.3814]],

        [[-0.4368, -0.6785, -0.3087]],

        [[-0.5490, -0.9849, -1.2383]],

        [[ 1.1298,  1.4889, -0.4294]]])

In [None]:
inputs.shape

torch.Size([4, 1, 3])

LSTM block in pytorch

In [None]:
lstm = nn.LSTM(input_size = 3,
               hidden_size = 5,
               num_layers = 1)

First input:

In [None]:
out, hidden = lstm(inputs)

In [None]:
out.shape

torch.Size([4, 1, 5])

Hidden contains h and c, their shape is $(D \times num\_layers,N,H_{out}​)$

$D$ is the number of directions (here is 1)

$num\_layers$ is the number of layers (here is 1)


In [None]:
h, c = hidden

In [None]:
h.shape, c.shape

(torch.Size([1, 1, 5]), torch.Size([1, 1, 5]))

If we already had hidden states, we could add them to the input

In [None]:
lstm1 = nn.LSTM(input_size = 5,
               hidden_size = 5,
               num_layers = 1)
out1, hidden1 = lstm1(out, hidden)

In [None]:
out1.shape

torch.Size([4, 1, 5])

In [None]:
h1, c1 = hidden1
h1.shape, c1.shape

(torch.Size([1, 1, 5]), torch.Size([1, 1, 5]))

Build a model for Part-of-speech tagging

Prepare data. Models do not take the words/tokens, they take indices of them (need to use the same tokenizer for train and test to get indices)

In [None]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)


training_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]

word_to_ix = {}
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:  # word has not been assigned an index yet
            word_to_ix[word] = len(word_to_ix)  # Assign each word with a unique index
print(word_to_ix)
tag_to_ix = {"DET": 0, "NN": 1, "V": 2}  # Assign each tag with a unique index
tag_to_ix_rev = {0:"DET", 1:"NN", 2:"V"}
print(tag_to_ix)


{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}
{'DET': 0, 'NN': 1, 'V': 2}


In [None]:
sentence_in = prepare_sequence(training_data[0][0], word_to_ix)
targets = prepare_sequence(training_data[0][1], tag_to_ix)
sentence_in, targets

(tensor([0, 1, 2, 3, 4]), tensor([0, 1, 2, 0, 1]))

Define the model:

In [None]:
EMBEDDING_DIM = 3
HIDDEN_DIM = 3

class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        #Do not save hidden states, we won't use them
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [None]:
torch.manual_seed(0)
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

Untrained model (for the first sesntence for every word returned the probabilities)

In [None]:
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    print(tag_scores)

tensor([[-0.9950, -1.0767, -1.2395],
        [-0.9256, -1.1811, -1.2148],
        [-0.9092, -1.1214, -1.3045],
        [-0.9406, -1.1564, -1.2208],
        [-0.9285, -1.1842, -1.2077]])


In [None]:
import tqdm

for epoch in tqdm.tqdm(range(3)):
    for sentence, tags in training_data:

        model.zero_grad()

        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)

        tag_scores = model(sentence_in)

        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()


100%|██████████| 3/3 [00:00<00:00, 181.41it/s]


In [None]:
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    print(tag_scores)
    print(training_data[0][0])
    print([tag_to_ix_rev[idx] for idx in tag_scores.argmax(dim=-1).tolist()])

tensor([[-1.0316, -1.0060, -1.2806],
        [-0.9654, -1.0981, -1.2529],
        [-0.9483, -1.0474, -1.3404],
        [-0.9796, -1.0764, -1.2599],
        [-0.9711, -1.0964, -1.2473]])
['The', 'dog', 'ate', 'the', 'apple']
['NN', 'DET', 'DET', 'DET', 'DET']


## Embeddings

### SVD

In [None]:
from sklearn.datasets import fetch_20newsgroups
data = fetch_20newsgroups(subset='all', categories=['comp.graphics', 'sci.med'])

In [None]:
from sklearn.model_selection import train_test_split
texts = data['data']
texts_train, _ = train_test_split(
    texts, train_size=0.2, random_state=10
)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer()
W = tf.fit_transform(texts_train)

In [None]:
W.A

array([[0.03583734, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.045274  , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

We can get a vector of size $\le 392$

In [None]:
W.shape

(392, 14674)

Words:

In [None]:
words_list = tf.get_feature_names_out()
words_list

array(['00', '000', '0000', ..., 'zyda', 'zyeh', 'zyxel'], dtype=object)

In [None]:
ind = 1
words_list[ind], W[:,ind].A.reshape(-1)

('000',
 array([0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.      

Now, apply SVD ($k < 392$), less that every dim of the matrix

In [None]:
from scipy.sparse.linalg import svds
import numpy as np

def apply_svd(W, k):
  '''
  W - matrix texts x words
  k - the rank of the SVD, must be less than any dimension of W
  '''
  #Apply the SVD function
  u, sigma, vt = svds(W, k)

  #The function does not garantee, that the order of the singular values is descending
  #So, we need to create it by hand
  descending_order_of_inds = np.flip(np.argsort(sigma))
  u = u[:,descending_order_of_inds]
  vt = vt[descending_order_of_inds]
  sigma = sigma[descending_order_of_inds]

  #Checking that sizes are ok
  assert sigma.shape == (k,)
  assert vt.shape == (k, W.shape[1])
  assert u.shape == (W.shape[0], k)

  return np.dot(np.diag(sigma), vt).T

In [None]:
svd_100 = apply_svd(W, 100)

In [None]:
svd_100.shape

(14674, 100)

Dictionary:

In [None]:
def create_dictionary(words_list, vv, output_file=None):
  dictionary = {}
  for word, vector in zip(words_list, vv):
    dictionary[word] = vector
  if output_file is not None: np.save(output_file, dictionary)
  return dictionary

In [None]:
dictionary = create_dictionary(words_list, svd_100)

In [None]:
dictionary['pain']

array([ 0.08162068,  0.01457324,  0.0573702 ,  0.01092644,  0.01441518,
        0.0581395 , -0.0361221 , -0.12796821, -0.05787762, -0.00717716,
        0.03292557,  0.03244072,  0.03767801,  0.03763028, -0.05759102,
        0.07130174, -0.03421932, -0.00374897,  0.04523664, -0.00426982,
        0.02439249,  0.06825938, -0.04353595, -0.00872732, -0.0056856 ,
       -0.01805626,  0.02315732,  0.02750838,  0.0445767 ,  0.00490938,
        0.01626406, -0.01698112,  0.01522541, -0.00039996,  0.02775254,
        0.01855605, -0.03545648, -0.02548007,  0.02332959,  0.01138778,
        0.04560373,  0.02590587,  0.00739913,  0.02607714, -0.03106278,
       -0.02118802, -0.02831239, -0.00247472,  0.02581147, -0.00917712,
       -0.01216964,  0.00822348, -0.1169213 , -0.00164302,  0.06081853,
       -0.012541  ,  0.05355983,  0.10852498, -0.02552695,  0.12563968,
        0.08388369,  0.00283484,  0.10492765, -0.01099165,  0.0984122 ,
       -0.07249084,  0.02511119,  0.07486388,  0.02599941, -0.01

If we want to take the dimension that is less than $k= 100$

In [None]:
d = 8
dictionary['pain'][:d]

array([ 0.08162068,  0.01457324,  0.0573702 ,  0.01092644,  0.01441518,
        0.0581395 , -0.0361221 , -0.12796821])

#Word2Vec

In [None]:
texts_split = [[w.lower() for w in text.split()] for text in texts]
texts_split[0][:10]

['from:',
 'dyer@spdcc.com',
 '(steve',
 'dyer)',
 'subject:',
 're:',
 'analgesics',
 'with',
 'diuretics',
 'organization:']

In [None]:
from gensim.models import Word2Vec

dimension = 100

model = Word2Vec(sentences=texts_split, vector_size=dimension, min_count=1, sg=0)

In [None]:
model.wv['pain']

array([-0.10188567,  0.3168349 , -0.03407288,  0.02911603, -0.1944051 ,
       -0.816469  ,  0.25281304,  1.0152956 , -0.65765095, -0.43179172,
       -0.12716492, -0.81911385,  0.3574901 ,  0.12279672,  0.24130367,
       -0.3201511 ,  0.18263566, -0.09239068, -0.10735   , -0.919488  ,
        0.65994495,  0.2521395 ,  0.5827362 ,  0.07564346,  0.13465245,
        0.09422866, -0.21135601, -0.12360355, -0.48472223, -0.04254604,
        0.24506234, -0.0665401 ,  0.8892004 , -0.7764519 , -0.24485995,
        1.0993984 ,  0.00881823, -0.31726396, -0.5698758 , -0.1666339 ,
        0.02129736, -0.2473668 , -0.5542225 ,  0.17616703,  0.57803196,
       -0.21031965, -0.621007  , -0.38039428,  0.3606994 ,  0.16589774,
        0.17252785, -0.36901903, -0.20050414, -0.5930792 , -0.2109072 ,
        0.11169235,  0.4547525 , -0.2709127 , -0.09842043,  0.26534796,
        0.37737733, -0.07746303,  0.13796492, -0.02852457, -0.5657466 ,
        0.49047858,  0.38561183,  0.8171999 , -0.6177066 ,  0.61

In [None]:
model.wv.most_similar('computer')

[('national', 0.9825732707977295),
 ('western', 0.9788332581520081),
 ('science,', 0.9780481457710266),
 ('institute', 0.9777011275291443),
 ('southern', 0.9772838950157166),
 ('pittsburgh', 0.9762675762176514),
 ('mathematics', 0.974141001701355),
 ('oregon', 0.973869264125824),
 ('state', 0.973533034324646),
 ('school', 0.9728400707244873)]

In [None]:
model.wv.most_similar(positive=['pain', 'head'], negative=['hand'])

[('quite', 0.9816800951957703),
 ('first', 0.977030336856842),
 ('getting', 0.976671040058136),
 ('while', 0.9752770662307739),
 ('made', 0.9749552607536316),
 ('medication', 0.9745034575462341),
 ('book.', 0.9740958213806152),
 ('study', 0.9695881009101868),
 ('doing', 0.9694724678993225),
 ('after', 0.9689552187919617)]

In [None]:
model.wv.most_similar(positive=['man'], negative=['woman'])

[('$699', 0.6564669609069824),
 ('patch8', 0.5252602100372314),
 ('macwarehouse.', 0.5206255912780762),
 ('annoyance', 0.5169080495834351),
 ('online?', 0.5078411102294922),
 ('draw.', 0.5008156299591064),
 ('oklahoma(tulsa)', 0.4982879161834717),
 ('>utilities', 0.4879986643791199),
 ('101h.', 0.4840180277824402),
 ('pbm', 0.4821309745311737)]

In [None]:
model.wv.most_similar(positive=['man', 'doctor'], negative=['woman'])

[('pain.', 0.9759824275970459),
 ('implemented', 0.9738233089447021),
 ('treat', 0.9697330594062805),
 ('fat.', 0.9691821336746216),
 ('horrible', 0.9682165384292603),
 ('indicate', 0.968027651309967),
 ('informed', 0.967625617980957),
 ('net', 0.963956892490387),
 ('problems.', 0.9631531238555908),
 ('insurance,', 0.9630241990089417)]

In [None]:
model.wv[';jhdrgksrjhnlgkjdhgljkgdjf']

KeyError: "Key ';jhdrgksrjhnlgkjdhgljkgdjf' not present"

## FastText

In [None]:
!pip install fasttext

Collecting fasttext
  Downloading fasttext-0.9.3.tar.gz (73 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/73.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.4/73.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pybind11>=2.2 (from fasttext)
  Using cached pybind11-2.13.6-py3-none-any.whl.metadata (9.5 kB)
Using cached pybind11-2.13.6-py3-none-any.whl (243 kB)
Building wheels for collected packages: fasttext
  Building wheel for fasttext (pyproject.toml) ... [?25l[?25hdone
  Created wheel for fasttext: filename=fasttext-0.9.3-cp311-cp311-linux_x86_64.whl size=4313470 sha256=a06cafd2e8966d8da01fd1320c32b1b4a3d2d8d0e46c434c16f7c9641c846bb9
  Stored in directory: /root/.cache/pip/wheels/65/4f/35/5057db0249224e9ab55a51

In [None]:
import fasttext.util
fasttext.util.download_model('en', if_exists='ignore')  # English
ft = fasttext.load_model('cc.en.300.bin')

Downloading https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz



In [None]:
ft['cat']

array([ 0.08105576, -0.02083234, -0.03326922,  0.28555283,  0.13959414,
       -0.1977245 ,  0.10128298,  0.01085356, -0.103824  ,  0.04313416,
       -0.14833796, -0.16765352, -0.15447043, -0.14154345,  0.12743813,
        0.2279076 ,  0.07685639, -0.13873424, -0.20190817,  0.01528534,
       -0.06999817,  0.11306947,  0.01669297,  0.11389008,  0.02094817,
       -0.31620952,  0.09814467, -0.1449248 ,  0.09949644,  0.2211973 ,
        0.02225026,  0.06751259, -0.06465218,  0.11267239, -0.0256991 ,
       -0.04765478,  0.03917777,  0.00168321, -0.11691307, -0.27667975,
       -0.06021226,  0.11350961, -0.11300616,  0.08379158, -0.21970375,
        0.06771149,  0.0296645 , -0.05783203, -0.12882547,  0.09360313,
       -0.0628323 , -0.08581617,  0.17381558, -0.10044617, -0.28967732,
       -0.01837742,  0.01613754, -0.0155128 , -0.11910667,  0.20571907,
        0.2338278 ,  0.17166924,  0.07774843,  0.05795193, -0.05462614,
       -0.05604232,  0.07913449,  0.32939437, -0.21045874, -0.11

In [None]:
ft.get_nearest_neighbors('cat')

[(0.8350069522857666, 'cats'),
 (0.8233457207679749, 'kitty'),
 (0.8083016276359558, 'kitten'),
 (0.7533658742904663, 'feline'),
 (0.7111546397209167, 'moggie'),
 (0.7107509970664978, 'cat.It'),
 (0.7078612446784973, 'dog'),
 (0.7065863609313965, 'cat.The'),
 (0.7062479257583618, 'cat.I'),
 (0.7057439088821411, 'moggy')]

In [None]:
ft.get_nearest_neighbors('doctor')

[(0.7759157419204712, 'physician'),
 (0.7168029546737671, 'doctor.He'),
 (0.7070956826210022, 'doctors'),
 (0.7047469019889832, 'gynecologist'),
 (0.7046633362770081, 'docto'),
 (0.6922913789749146, 'pediatrician'),
 (0.6802521347999573, 'docotor'),
 (0.6785653233528137, 'doctor.I'),
 (0.6722302436828613, 'doctor.So'),
 (0.6719358563423157, 'doctor.')]

In [None]:
ft.get_nearest_neighbors('sightseeing')

[(0.8238611817359924, 'sight-seeing'),
 (0.807179868221283, 'site-seeing'),
 (0.7274408936500549, 'sighseeing'),
 (0.6778866648674011, 'Sightseeing'),
 (0.6742253303527832, 'siteseeing'),
 (0.6740902662277222, 'sightsee'),
 (0.6500001549720764, 'touristing'),
 (0.6408713459968567, 'sightseing'),
 (0.6392289400100708, 'sight-see'),
 (0.6367641687393188, 'excursions')]

In [None]:
ft.get_nearest_neighbors('soghtseeing')

[(0.43084660172462463, 'citrix-xendesktop-and-xenserver'),
 (0.3963817059993744, 'citrix-desktop-virtualization'),
 (0.3946278989315033,
  'crescendosexibloguerobateyabsorbersexiindesignabledinerolatifundiosexibrezarcularsutesexirapoplinbrezarcorrentosoVd.lazadareflejoreglafeministabrezarchuzasexiouttiqueblogueroin'),
 (0.3893582820892334, 'builders-building-contractors'),
 (0.38910073041915894,
  'deblogueroreflejoantecedentesexitlacuachebateysuteindesignableabsorbersexilatifundiosexibrezarsutemultiétnicosexiplinrapobrezarcorrentosoVd.lazadafisiochillidomabrezarsico-chuzaoutcolodrablogueroin'),
 (0.3837328851222992,
  'DEky4M0BSpUOTPnSpkuL5I0GTSnRI4jMepcaFAoxIoFnX5kmJQk1aYvr2odGBAAIfkECQoABAAsCQAAABAAEgAACGcAARAYSLCgQQEABBokkFAhAQEQHQ4EMKCiQogRCVKsOOAiRocbLQ7EmJEhR4cfEWoUOTFhRIUNE44kGZOjSIQfG9rsyDCnzp0AaMYMyfNjS6JFZWpEKlDiUqALJ0KNatKmU4NDBwYEACH5BAkKAAQALAkAAAAQABIAAAhpAAEQGEiQIICDBAUgLEgAwICHAgkImBhxoMOHAyJOpGgQY8aBGxV2hJgwZMWLFTcCUIjwoEuLBym69PgxJMuDNAUqVDkz50qZLi'),
 (0.37830841541

In [None]:
ft[';jhdrgksrjhnlgkjdhgljkgdjf']

array([ 0.0096625 ,  0.00011715,  0.00907286,  0.02443647, -0.01559941,
       -0.00864334, -0.00735149,  0.00916221, -0.00429722,  0.00417206,
        0.02430423, -0.00337047,  0.01049395, -0.01674665, -0.02086007,
        0.00355157,  0.00730138, -0.02736767,  0.00813878,  0.02119258,
       -0.01457397, -0.01296001,  0.00260688,  0.00192531,  0.02882681,
        0.01313066, -0.03988799,  0.00451657,  0.00338194,  0.04865512,
        0.02146391,  0.00574875,  0.00133127,  0.01997279,  0.00495417,
       -0.00906077,  0.00552217,  0.00913218, -0.00323922,  0.01522139,
       -0.00251797,  0.0006057 , -0.00974919,  0.02404195,  0.01464827,
        0.01850036,  0.00364946, -0.00379995,  0.00063753,  0.00513693,
        0.00370044, -0.0012614 ,  0.00170929, -0.01435363,  0.00758927,
        0.03057508,  0.00100128,  0.00397344, -0.00061249,  0.01617835,
       -0.0043611 ,  0.01284677,  0.00573666,  0.01671126, -0.00338241,
        0.00163947,  0.02266678, -0.01024497, -0.00991273,  0.01

In [None]:
ft.get_nearest_neighbors(';jhdrgksrjhnlgkjdhgljkgdjf')

[(0.6087337732315063,
  'QQJCgAEACwJAAAAEAASAAAIaQABEBhIkCCAgwQFICxIAMCAhwIJCJgYcaDDhwMiTqRoEGPGgRsVdoSYMGTFixU3AlCI8KBLiwcpuvT4MSTLgzQFKlQ5M'),
 (0.6080808043479919,
  'DEky4M0BSpUOTPnSpkuL5I0GTSnRI4jMepcaFAoxIoFnX5kmJQk1aYvr2odGBAAIfkECQoABAAsCQAAABAAEgAACGcAARAYSLCgQQEABBokkFAhAQEQHQ4EMKCiQogRCVKsOOAiRocbLQ7EmJEhR4cfEWoUOTFhRIUNE44kGZOjSIQfG9rsyDCnzp0AaMYMyfNjS6JFZWpEKlDiUqALJ0KNatKmU4NDBwYEACH5BAUKAAQALAkAAAAQABIAAAhpAAEQGEiQIICDBAUgLEgAwICHAgkImBhxoMOHAyJOpGgQY8aBGxV2hJgwZMWLFTcCUIjwoEuLBym69PgxJMuDNAUqVDkz50qZLi'),
 (0.6071555018424988,
  'QQJCgAEACwJAAAAEAASAAAIZwABEBhIsKBBAQAEGiSQUCEBARAdDgQwoKJCiBEJUqw44CJGhxstDsSYkSFHhx8RahQ5MWFEhQ0TjiQZk6NIhB8b2uzIMKfOnQBoxgzJ82NLokVlakQqUOJSoAsnQo1q0qZTg0MHBgQAIfkECQoABAAsCQAAABAAEgAACGkAARAYSJAggIMEBSAsSADAgIcCCQiYGHGgw4cDIk6kaBBjxoEbFXaEmDBkxYsVNwJQiPCgS4sHKbr0'),
 (0.6069748997688293,
  'DEky4M0BSpUOTPnSpkuL5I0GTSnRI4jMepcaFAoxIoFnX5kmJQk1aYvr2odGBAAIfkECQoABAAsCQAAABAAEgAACGcAARAYSLCgQQEABBokkFAhAQEQHQ4EMKCiQogRCVKsOOAiRocbLQ7EmJEhR4cfE

## Sources

https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html

