# Usages of various PyTorch apis

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from ipython_utils import *

## Setup

In [2]:
# The data setup here is used to illustrate usage of various apis
# here are 100 english words. Index of 'how' is 0, index of 'to' is 1, ... index of 'two' is 99
v100 = ['how', 'to', 'her', 'at', 'up', 'see', 'in', 'thing', 'even', 'because', 'or', 
             'what', 'man', 'this', 'for', 'with', 'time', 'now', 'give', 'very', 'take', 'other', 
             'there', 'would', 'first', 'about', 'people', 'think', 'find', 'so', 'say', 'as', 
             'many', 'will', 'just', 'he', 'I', 'well', 'our', 'tell', 'out', 'have', 'can', 'its', 
             'make', 'get', 'if', 'than', 'use', 'that', 'new', 'also', 'from', 'by', 'his', 'year', 
             'do', 'some', 'the', 'no', 'a', 'those', 'she', 'come', 'one', 'their', 'more', 
             'these', 'all', 'go', 'and', 'could', 'him', 'into', 'only', 'who', 'of', 'it', 'your', 
             'not', 'you', 'here', 'when', 'on', 'which', 'then', 'know', 'them', 'my', 'me', 'we', 
             'want', 'they', 'like', 'look', 'day', 'way', 'but', 'be', 'two']

# Naturally this vocabulary is insufficient even for most common sentences. So we map all out of vocabulary words to <unk>
# add <unk> to vocabulary at index 100. <unk> is the replacement for out of vocabulary words
v100.append('<unk>')

# create a dictionary to lookup word's index
w2i = {word: i for i, word in enumerate(v100)}

# define few sentences of fixed size. we wish to lookup embeddings for words in these sentences
# here are 5 sentences
sents_lang = [
    'this is my book',
    'those are your books',
    'what is your name',
    'i will be back',
    'go out and about'
]

# tranform to indexed representation of sentences
sents_indices = [[w2i.get(word, w2i['<unk>']) for word in sent.split()] for sent in sents_lang]

# print representations
print_h4('Sentences represented as array of indices')
print_table(['Sentence', 'Word Indices'], zip(sents_lang, sents_indices))

# convert sents_word_indices to a tensor
sents_tensor = torch.tensor(sents_indices)
print_h4('Sentences represented as a tensor')
print(sents_tensor)
print('Shape = {}'.format(sents_tensor.shape))

#### Sentences represented as array of indices

|Sentence | Word Indices|
|--|--|
| this is my book | [13, 100, 88, 100] |
| those are your books | [61, 100, 78, 100] |
| what is your name | [11, 100, 78, 100] |
| i will be back | [100, 33, 98, 100] |
| go out and about | [69, 40, 70, 25] |

#### Sentences represented as a tensor

tensor([[ 13, 100,  88, 100],
        [ 61, 100,  78, 100],
        [ 11, 100,  78, 100],
        [100,  33,  98, 100],
        [ 69,  40,  70,  25]])
Shape = torch.Size([5, 4])


## Embeddings
[API Docs](https://pytorch.org/docs/stable/generated/torch.nn.Embedding.html)

In [3]:
# This example illustrates the usage of nn.Embedding
# Embedding is a lookup table -- to lookup a vector stored against a key (typically an integer index)
# You supply a bunch of keys, you get back corresponding bunch of vectors

# create an embedding to lookup 10-dimensional vectors for each word
embedding_1 = nn.Embedding(num_embeddings=len(v100), embedding_dim=10)

In [4]:
# underneath embeddings are weights, initialized with random values ~ N(0, 1)
print_md("#### Embedding weights")
print("Shape of weights = {}".format(embedding_1.weight.shape))

# what's the embedding for the word 'who'
embed = embedding_1(torch.tensor([ w2i['who']]))
print_h4("Embedding of '*who*'")
print("{}\nShape = {}".format(embed, embed.shape))

# what's the embedding of 2nd sentence
embed = embedding_1(sents_tensor[1])
print_h4("Embedding of the sentence '*{}*'".format(sents_lang[1]))
print("{}\nShape = {}".format(embed, embed.shape))

# embeddings for all sentences
embed = embedding_1(sents_tensor)
print_h4("Embeddings of all 5 sentences")
print("{}\nShape = {}".format(embed, embed.shape))

#### Embedding weights

Shape of weights = torch.Size([101, 10])


#### Embedding of '*who*'

tensor([[ 8.9702e-01, -1.5678e+00, -1.8237e+00,  1.7238e-04, -9.0997e-03,
          4.1219e-01, -3.5223e-02,  1.3769e-01,  4.5530e-01, -4.6319e-01]],
       grad_fn=<EmbeddingBackward>)
Shape = torch.Size([1, 10])


#### Embedding of the sentence '*those are your books*'

tensor([[-0.4524,  1.3342,  0.1735, -1.2838, -0.7156, -0.8297,  0.6391, -1.2374,
         -0.1712,  1.6919],
        [-0.5427, -1.0992, -1.4240, -0.2789, -0.5119,  0.3951,  0.6345, -0.6301,
          0.4329, -1.2099],
        [-0.6873, -1.6254, -0.0856, -0.5791, -0.7229, -0.0749,  0.2925,  0.2216,
         -0.9191,  0.8818],
        [-0.5427, -1.0992, -1.4240, -0.2789, -0.5119,  0.3951,  0.6345, -0.6301,
          0.4329, -1.2099]], grad_fn=<EmbeddingBackward>)
Shape = torch.Size([4, 10])


#### Embeddings of all 5 sentences

tensor([[[ 1.0969, -1.3230, -0.8039, -0.4816, -1.6083,  1.2754,  0.3380,
           0.8013, -1.1279, -0.6775],
         [-0.5427, -1.0992, -1.4240, -0.2789, -0.5119,  0.3951,  0.6345,
          -0.6301,  0.4329, -1.2099],
         [-0.1417,  0.0158,  1.6455, -0.3149,  0.1524,  0.0045,  0.1179,
          -1.4465, -1.6636,  0.3065],
         [-0.5427, -1.0992, -1.4240, -0.2789, -0.5119,  0.3951,  0.6345,
          -0.6301,  0.4329, -1.2099]],

        [[-0.4524,  1.3342,  0.1735, -1.2838, -0.7156, -0.8297,  0.6391,
          -1.2374, -0.1712,  1.6919],
         [-0.5427, -1.0992, -1.4240, -0.2789, -0.5119,  0.3951,  0.6345,
          -0.6301,  0.4329, -1.2099],
         [-0.6873, -1.6254, -0.0856, -0.5791, -0.7229, -0.0749,  0.2925,
           0.2216, -0.9191,  0.8818],
         [-0.5427, -1.0992, -1.4240, -0.2789, -0.5119,  0.3951,  0.6345,
          -0.6301,  0.4329, -1.2099]],

        [[ 0.0220,  1.3754, -0.1197,  0.2846, -0.1834, -0.7634,  0.5949,
           0.5708,  0.2044,  1.1528

In [5]:
# A little sophisticated embedding layer
# Suppose the embeddings we want to lookup are to be linear transformed. This can be done by implementing a custom module wrapping nn.Embedding
# This module applies a linear transformation on word embeddings of size 'in_embedding_dim', the output is embeddings of size 'out_embedding_dim'
class CustomEmbedding1(nn.Module):
    def __init__(self, num_embeddings, in_embedding_dim, out_embedding_dim):
        super(CustomEmbedding1, self).__init__()
        self.embedding = nn.Embedding(num_embeddings, in_embedding_dim)
        self.linear = nn.Linear(in_features=in_embedding_dim, out_features=out_embedding_dim)
    
    def forward(self, input):
        return F.relu_(self.linear(self.embedding(input)))

cust_embedding_1 = CustomEmbedding1(num_embeddings=len(v100), in_embedding_dim=10, out_embedding_dim=5)

In [6]:
# what's the embedding for the word 'who'
cust_embed = cust_embedding_1(torch.tensor([ w2i['who']]))
print_h4("Embedding of '*who*'")
print("{}\nShape = {}".format(cust_embed, cust_embed.shape))

# what's the embedding of 2nd sentence
cust_embed = cust_embedding_1(sents_tensor[1])
print_h4("Embedding of the sentence: '*{}*'".format(sents_lang[1]))
print("{}\nShape = {}".format(cust_embed, cust_embed.shape))

# embeddings for all sentences
cust_embed = cust_embedding_1(sents_tensor)
print_h4("Embeddings of all 5 sentences")
print("{}\nShape = {}".format(cust_embed, cust_embed.shape))

#### Embedding of '*who*'

tensor([[0.6506, 0.1312, 1.5131, 0.3793, 0.0000]], grad_fn=<ReluBackward1>)
Shape = torch.Size([1, 5])


#### Embedding of the sentence: '*those are your books*'

tensor([[0.0000, 0.0000, 0.2898, 0.5577, 0.0000],
        [0.4989, 2.3749, 0.2414, 0.5542, 0.1100],
        [0.0000, 0.0000, 0.7519, 0.1280, 0.0000],
        [0.4989, 2.3749, 0.2414, 0.5542, 0.1100]], grad_fn=<ReluBackward1>)
Shape = torch.Size([4, 5])


#### Embeddings of all 5 sentences

tensor([[[3.0627e-01, 8.6871e-01, 0.0000e+00, 0.0000e+00, 3.1620e-01],
         [4.9887e-01, 2.3749e+00, 2.4142e-01, 5.5416e-01, 1.0999e-01],
         [6.5077e-01, 0.0000e+00, 7.5747e-03, 9.5336e-01, 7.7200e-01],
         [4.9887e-01, 2.3749e+00, 2.4142e-01, 5.5416e-01, 1.0999e-01]],

        [[0.0000e+00, 0.0000e+00, 2.8975e-01, 5.5775e-01, 0.0000e+00],
         [4.9887e-01, 2.3749e+00, 2.4142e-01, 5.5416e-01, 1.0999e-01],
         [0.0000e+00, 0.0000e+00, 7.5188e-01, 1.2798e-01, 0.0000e+00],
         [4.9887e-01, 2.3749e+00, 2.4142e-01, 5.5416e-01, 1.0999e-01]],

        [[1.1777e+00, 1.0145e+00, 0.0000e+00, 1.9592e-02, 5.4143e-04],
         [4.9887e-01, 2.3749e+00, 2.4142e-01, 5.5416e-01, 1.0999e-01],
         [0.0000e+00, 0.0000e+00, 7.5188e-01, 1.2798e-01, 0.0000e+00],
         [4.9887e-01, 2.3749e+00, 2.4142e-01, 5.5416e-01, 1.0999e-01]],

        [[4.9887e-01, 2.3749e+00, 2.4142e-01, 5.5416e-01, 1.0999e-01],
         [0.0000e+00, 1.6228e+00, 6.9153e-01, 0.0000e+00, 5.6879e-01],


## Conv1d
[API Docs](https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html)

In [7]:
"""
This example illustrates usage of nn.Conv1d

Imagine input is a sentence of words, and we lookup embeddings for each word. 
If sentence length is 'l' and embeddings' size is 'n', then each sentence is a (l x n) matrix

To apply a 1-D convolution such that there are 'm' output channels, we will need 'm' kernels/filters.
If 'k' is the kernel size, then each kernel is a (k x n) matrix

We will use the setup from above, where 'sents_tensor' is a tensor of word indices.
"""

# We first get embedded representation of sentences by applying embedding layer.
# shape of sents_embedded = (s x l x n); where s = batch_size (no.of sentences), l = length of sentence, n = embedding size
sents_embedded = embedding_1(sents_tensor)

batch_size, sent_len, embed_size = sents_embedded.shape

# no.of output channels & kernerl size are hyperparameters, for simplicity we set 4, 2 respectively
out_channels = 4
kernel_size = 2  

# define a convolution layer
c1d = nn.Conv1d(in_channels=embed_size, out_channels=out_channels, kernel_size=kernel_size)

In [8]:
"""
Naturally to apply covolution, your input needs to be sequence. Eg: a sequence of words (= sentence)

nn.Conv1d expects a batch input. And, it applies convolution filters(aka kernels) along the last dimension of input.

To apply convolution on a single sentence, represented as a (l x n) matrix. Here l=sentence length; n=word embedding size
1. Transform it to a 'batched-input' of shape (1 x l x n) -- basically a batch of just 1 sentence
2. We want to apply convolution filters along the length (2nd dimension). But nn.Conv1d assumes the 3rd dimension as the length.
3. So transpose input to shape (1 x n x l), before convolving.

To apply convolution on set of sentences, represented as a (s x l x n) matrix. Here s=no.of sentences; l=sentence length; n=word embedding size
1. We already have a 'batched-input', no changes required
2. We want to apply convolution filters along the length (2nd dimension) of each sentence. But for nn.Conv1d 3rd dimension is the sentence length.
3. Transpose input to shape (s x n x l), before convolving.
"""
sent_embedded = sents_embedded[2]
print_h4("Convolving single sentence '*{}*'".format(sents_lang[2]))
print('Embedded representation = {}'.format(sent_embedded))
print('Shape = {}\n'.format(sent_embedded.shape))

# convert to 3-d 'batched-input', as required by nn.Conv1d
# with 1st dimension being the batch size
sent_embedded = sent_embedded.unsqueeze(0)
print('Batched-input representation = {}'.format(sent_embedded))
print('Shape = {}\n'.format(sent_embedded.shape))

# transpose
sent_embedded = sent_embedded.transpose(dim0=1, dim1=2)
print('Batched-input transposed = {}'.format(sent_embedded))
print('Shape = {}\n'.format(sent_embedded.shape))

# convolve
# output tensor's shape: (s x m x l_o)
# Here s=batch_size; m=out_chanels (aka no.of filters/kernels); l_o=output length, which is a function of: input_length(l), stride (defaults to 1) & kernel_size
cnlv_1 = c1d(sent_embedded)
print('Convolution output = {}'.format(cnlv_1))
print('Shape = {}\n'.format(cnlv_1.shape))


# now let's convolve a batch of 2 sentences
print_h4("Convolving 2 sentences '*{}*' and '*{}*'".format(sents_lang[1], sents_lang[3]))
sents_embedded_2 = sents_embedded[torch.tensor([0, 1, 0, 1, 0], dtype=bool)]
print('Batched-Input = {}'.format(sents_embedded_2))
print('Shape = {}\n'.format(sents_embedded_2.shape))

# transpose
sents_embedded_2 = sents_embedded_2.transpose(dim0=1, dim1=2)
print('Batched-input transposed = {}'.format(sents_embedded_2))
print('Shape = {}\n'.format(sents_embedded_2.shape))

# convolve
# output tensor's shape: (s x m x l_o)
# Here s=batch_size; m=out_chanels (aka no.of filters/kernels); l_o=output length, which is a function of: input_length(l), stride (defaults to 1) & kernel_size
cnlv_2 = c1d(sents_embedded_2)
print('Convolution output = {}'.format(cnlv_2))
print('Shape = {}\n'.format(cnlv_2.shape))

#### Convolving single sentence '*what is your name*'

Embedded representation = tensor([[ 0.0220,  1.3754, -0.1197,  0.2846, -0.1834, -0.7634,  0.5949,  0.5708,
          0.2044,  1.1528],
        [-0.5427, -1.0992, -1.4240, -0.2789, -0.5119,  0.3951,  0.6345, -0.6301,
          0.4329, -1.2099],
        [-0.6873, -1.6254, -0.0856, -0.5791, -0.7229, -0.0749,  0.2925,  0.2216,
         -0.9191,  0.8818],
        [-0.5427, -1.0992, -1.4240, -0.2789, -0.5119,  0.3951,  0.6345, -0.6301,
          0.4329, -1.2099]], grad_fn=<SelectBackward>)
Shape = torch.Size([4, 10])

Batched-input representation = tensor([[[ 0.0220,  1.3754, -0.1197,  0.2846, -0.1834, -0.7634,  0.5949,
           0.5708,  0.2044,  1.1528],
         [-0.5427, -1.0992, -1.4240, -0.2789, -0.5119,  0.3951,  0.6345,
          -0.6301,  0.4329, -1.2099],
         [-0.6873, -1.6254, -0.0856, -0.5791, -0.7229, -0.0749,  0.2925,
           0.2216, -0.9191,  0.8818],
         [-0.5427, -1.0992, -1.4240, -0.2789, -0.5119,  0.3951,  0.6345,
          -0.6301,  0.4329, -1.2099]]], grad_

#### Convolving 2 sentences '*those are your books*' and '*i will be back*'

Batched-Input = tensor([[[-0.4524,  1.3342,  0.1735, -1.2838, -0.7156, -0.8297,  0.6391,
          -1.2374, -0.1712,  1.6919],
         [-0.5427, -1.0992, -1.4240, -0.2789, -0.5119,  0.3951,  0.6345,
          -0.6301,  0.4329, -1.2099],
         [-0.6873, -1.6254, -0.0856, -0.5791, -0.7229, -0.0749,  0.2925,
           0.2216, -0.9191,  0.8818],
         [-0.5427, -1.0992, -1.4240, -0.2789, -0.5119,  0.3951,  0.6345,
          -0.6301,  0.4329, -1.2099]],

        [[-0.5427, -1.0992, -1.4240, -0.2789, -0.5119,  0.3951,  0.6345,
          -0.6301,  0.4329, -1.2099],
         [-0.7345, -0.5617,  0.7280,  0.7789,  0.7054,  0.4866,  1.1243,
          -0.3388,  0.3729, -1.3791],
         [-0.0030, -2.3620, -1.0123, -0.9666, -1.6393,  0.5434, -0.0102,
           2.2300, -1.0741, -0.2395],
         [-0.5427, -1.0992, -1.4240, -0.2789, -0.5119,  0.3951,  0.6345,
          -0.6301,  0.4329, -1.2099]]], grad_fn=<IndexBackward>)
Shape = torch.Size([2, 4, 10])

Batched-input transposed = tensor([