# Usages of various PyTorch apis

In [1]:
import torch
import torch.nn as nn
from IPython.display import display_markdown

def print_md(s):
    display_markdown(s, raw=True)

## Embeddings
[API Docs](https://pytorch.org/docs/stable/generated/torch.nn.Embedding.html)

In [2]:
# This example illustrates the usage of nn.Embedding
# Embedding is a lookup table -- to lookup a vector stored against a key (typically an integer index)
# You supply a bunch of keys, you get back corresponding bunch of vectors

# This example tries to lookup vectors corresponding to words in a sentence
# Setup: here are 100 english words. Index of 'how' is 0, index of 'to' is 1, ... index of 'two' is 99
v100 = ['how', 'to', 'her', 'at', 'up', 'see', 'in', 'thing', 'even', 'because', 'or', 
             'what', 'man', 'this', 'for', 'with', 'time', 'now', 'give', 'very', 'take', 'other', 
             'there', 'would', 'first', 'about', 'people', 'think', 'find', 'so', 'say', 'as', 
             'many', 'will', 'just', 'he', 'I', 'well', 'our', 'tell', 'out', 'have', 'can', 'its', 
             'make', 'get', 'if', 'than', 'use', 'that', 'new', 'also', 'from', 'by', 'his', 'year', 
             'do', 'some', 'the', 'no', 'a', 'those', 'she', 'come', 'one', 'their', 'more', 
             'these', 'all', 'go', 'and', 'could', 'him', 'into', 'only', 'who', 'of', 'it', 'your', 
             'not', 'you', 'here', 'when', 'on', 'which', 'then', 'know', 'them', 'my', 'me', 'we', 
             'want', 'they', 'like', 'look', 'day', 'way', 'but', 'be', 'two']

# Naturally this vocabulary is insufficient even for most common sentences. Map all out of vocabulary words to <unk>
# add <unk> to vocabulary at index 100. <unk> is the replacement for out of vocabulary words
v100.append('<unk>')

# create a dictionary to lookup word's index
w2i = {word: i for i, word in enumerate(v100)}

# define few sentences of fixed size. we wish to lookup embeddings for words in these sentences
# here are 5 sentences
sents_lang = [
    'this is my book',
    'those are your books',
    'what is your name',
    'i will be back',
    'go out and about'
]

# tranform to indexed representation of sentences
sents_indices = [[w2i.get(word, w2i['<unk>']) for word in sent.split()] for sent in sents_lang]

# print representations
print_md('#### Sentences represented as array of indices')
for s1, s2 in zip(sents_lang, sents_indices):
    print_md('*{}* => {}'.format(s1, s2))

# convert sents_word_indices to a tensor
sents_tensor = torch.tensor(sents_indices)
print_md('#### Sentences represented as a tensor')
print(sents_tensor)
print('Shape = {}'.format(sents_tensor.shape))



# create an embedding to lookup 10-dimensional vectors for each word
embedding_1 = nn.Embedding(num_embeddings=len(v100), embedding_dim=10)

# what's the embedding for the word 'who'
embed = embedding_1(torch.tensor([ w2i['who']]))
print_md("#### Embedding of 'who'")
print("{}\nShape = {}".format(embed, embed.shape))

# what's the embedding of 2nd sentence
embed = embedding_1(sents_tensor[1])
print_md("#### Embedding of sentence {}".format(sents_lang[1]))
print("{}\nShape = {}".format(embed, embed.shape))

# embeddings for all sentences
embed = embedding_1(sents_tensor)
print_md("#### Embeddings of all 5 sentences")
print("{}\nShape = {}".format(embed, embed.shape))


# a little more sophisticated embedding lookup

#### Sentences represented as array of indices

*this is my book* => [13, 100, 88, 100]

*those are your books* => [61, 100, 78, 100]

*what is your name* => [11, 100, 78, 100]

*i will be back* => [100, 33, 98, 100]

*go out and about* => [69, 40, 70, 25]

#### Sentences represented as a tensor

tensor([[ 13, 100,  88, 100],
        [ 61, 100,  78, 100],
        [ 11, 100,  78, 100],
        [100,  33,  98, 100],
        [ 69,  40,  70,  25]])
Shape = torch.Size([5, 4])


#### Embedding of 'who'

tensor([[-0.3614,  0.4452,  0.5306, -1.2300,  0.5073,  0.5823, -0.1579, -0.8442,
         -0.6339,  0.5294]], grad_fn=<EmbeddingBackward>)
Shape = torch.Size([1, 10])


#### Embedding of sentence those are your books

tensor([[ 0.3372, -2.1102, -0.2155, -1.3066,  1.3178,  0.0671,  0.8783, -0.1200,
         -0.9483, -0.5488],
        [-0.1346,  1.5501, -0.1412,  0.8763,  0.3140, -2.2132, -2.0929, -0.6477,
          0.4682, -0.5573],
        [-0.1762, -1.5421,  0.4677, -0.5787,  1.5979,  1.4621,  0.9051,  0.2137,
          0.9754,  1.1032],
        [-0.1346,  1.5501, -0.1412,  0.8763,  0.3140, -2.2132, -2.0929, -0.6477,
          0.4682, -0.5573]], grad_fn=<EmbeddingBackward>)
Shape = torch.Size([4, 10])


#### Embeddings of all 5 sentences

tensor([[[-0.5163, -0.2413, -1.3965,  0.3270,  0.2376,  1.1178, -0.1212,
          -1.2906, -0.7408,  1.6643],
         [-0.1346,  1.5501, -0.1412,  0.8763,  0.3140, -2.2132, -2.0929,
          -0.6477,  0.4682, -0.5573],
         [ 0.5258, -1.7931, -0.9446,  0.9506, -0.7688, -0.3358, -0.5055,
           0.8074,  0.1054,  1.6304],
         [-0.1346,  1.5501, -0.1412,  0.8763,  0.3140, -2.2132, -2.0929,
          -0.6477,  0.4682, -0.5573]],

        [[ 0.3372, -2.1102, -0.2155, -1.3066,  1.3178,  0.0671,  0.8783,
          -0.1200, -0.9483, -0.5488],
         [-0.1346,  1.5501, -0.1412,  0.8763,  0.3140, -2.2132, -2.0929,
          -0.6477,  0.4682, -0.5573],
         [-0.1762, -1.5421,  0.4677, -0.5787,  1.5979,  1.4621,  0.9051,
           0.2137,  0.9754,  1.1032],
         [-0.1346,  1.5501, -0.1412,  0.8763,  0.3140, -2.2132, -2.0929,
          -0.6477,  0.4682, -0.5573]],

        [[-2.2525, -0.9693, -1.2082, -0.6579, -1.0667, -0.2031,  1.9947,
           0.9070,  0.8579,  0.1010