<a href="https://colab.research.google.com/github/bdeignan/nlp-with-pytorch/blob/develop/notebooks/chapter_1_exercises.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Chapter 1

Representing language: BoW, tokenization, vector representations

In [1]:
import torch

In [2]:
def describe(x):
    print("Type: {}".format(x.type()))
    print("Shape/size: {}".format(x.shape))
    print("Values: \n{}".format(x))

In [9]:
x = torch.tensor([0])

describe(x)

Type: torch.LongTensor
Shape/size: torch.Size([1])
Values: 
tensor([0])


In [10]:
describe(torch.tensor(0))

Type: torch.LongTensor
Shape/size: torch.Size([])
Values: 
0


In [11]:
describe(torch.tensor([0, 1, 3]))

Type: torch.LongTensor
Shape/size: torch.Size([3])
Values: 
tensor([0, 1, 3])


In [13]:
describe(torch.tensor([[0, 1, 3], [0, 1, 3]]))

Type: torch.LongTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0, 1, 3],
        [0, 1, 3]])


In [14]:
describe(torch.rand(2, 3))   # uniform random
describe(torch.randn(2, 3))  # random normal

Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0.5643, 0.7477, 0.9935],
        [0.2316, 0.2510, 0.4650]])
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[-1.6094, -0.9449, -0.1904],
        [ 0.5139,  0.7245,  0.4966]])


In [16]:
describe(torch.zeros(2, 3))
x = torch.ones(2, 3)
describe(x)
x.fill_(5)
describe(x)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0., 0., 0.],
        [0., 0., 0.]])
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0., 0., 0.],
        [0., 0., 0.]])
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[5., 5., 5.],
        [5., 5., 5.]])


In [17]:
x = torch.arange(6)
describe(x)

Type: torch.LongTensor
Shape/size: torch.Size([6])
Values: 
tensor([0, 1, 2, 3, 4, 5])


In [18]:
x = x.view(2, 3)
describe(x)

Type: torch.LongTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0, 1, 2],
        [3, 4, 5]])


NOTE:

how dim arg below changes which dimension gets summed. The dimension integer passed to `dim` is the dimension that you sum along. Therefore, the resulting tensor should just have that dimension (passed to `dim`) collapsed and all other dimensions still present.

In [19]:
describe(torch.sum(x, dim=0))

Type: torch.LongTensor
Shape/size: torch.Size([3])
Values: 
tensor([3, 5, 7])


In [20]:
describe(torch.sum(x, dim=1))

Type: torch.LongTensor
Shape/size: torch.Size([2])
Values: 
tensor([ 3, 12])


In [21]:
y = torch.rand(12)

describe(y)

Type: torch.FloatTensor
Shape/size: torch.Size([12])
Values: 
tensor([0.6874, 0.0408, 0.5146, 0.3597, 0.7149, 0.8677, 0.0578, 0.8077, 0.1573,
        0.5205, 0.9356, 0.0842])


In [22]:
y = y.view(2,3,2)

describe(y)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 3, 2])
Values: 
tensor([[[0.6874, 0.0408],
         [0.5146, 0.3597],
         [0.7149, 0.8677]],

        [[0.0578, 0.8077],
         [0.1573, 0.5205],
         [0.9356, 0.0842]]])


Guess what the resulting tensor dims should be below?

In [23]:
describe(torch.sum(y, dim=0)) # 3, 2 (elementwise add 2 3x2 matrices)

Type: torch.FloatTensor
Shape/size: torch.Size([3, 2])
Values: 
tensor([[0.7452, 0.8485],
        [0.6719, 0.8802],
        [1.6505, 0.9519]])


In [24]:
describe(torch.sum(y, dim=1)) # 2, 2 (sum "cols" in each of 2 matrices)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 2])
Values: 
tensor([[1.9169, 1.2682],
        [1.1507, 1.4124]])


In [25]:
describe(torch.sum(y, dim=2)) # 2, 3 (sum elements sep by commas)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0.7282, 0.8743, 1.5826],
        [0.8655, 0.6777, 1.0198]])


In [27]:
0.6874 + 0.0578

0.7452

In [28]:
0.6874 + 0.0408

0.7282

## Indexing, slicing, joining

In [29]:
x = torch.arange(6).view(2,3)
describe(x)

Type: torch.LongTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0, 1, 2],
        [3, 4, 5]])


In [30]:
x[:1, :2]

tensor([[0, 1]])

In [34]:
# grab 4 and 5
x[1:, 1:]

tensor([[4, 5]])

Notice the difference of above to below:

In [35]:
x[1, 1:] # index and slice

tensor([4, 5])

In [37]:
# join
torch.cat([x, x], dim=0)

tensor([[0, 1, 2],
        [3, 4, 5],
        [0, 1, 2],
        [3, 4, 5]])

In [38]:
torch.cat([x, x], dim=1)

tensor([[0, 1, 2, 0, 1, 2],
        [3, 4, 5, 3, 4, 5]])

In [40]:
describe(torch.stack([x, x], dim=0))

Type: torch.LongTensor
Shape/size: torch.Size([2, 2, 3])
Values: 
tensor([[[0, 1, 2],
         [3, 4, 5]],

        [[0, 1, 2],
         [3, 4, 5]]])


In [41]:
describe(torch.stack([x, x], dim=1))

Type: torch.LongTensor
Shape/size: torch.Size([2, 2, 3])
Values: 
tensor([[[0, 1, 2],
         [0, 1, 2]],

        [[3, 4, 5],
         [3, 4, 5]]])


In [43]:
x1 = torch.arange(6).view(2, 3)

x2 = torch.ones(3, 2)
x2[:, 1] += 1 #broadcasting
describe(x2)

Type: torch.FloatTensor
Shape/size: torch.Size([3, 2])
Values: 
tensor([[1., 2.],
        [1., 2.],
        [1., 2.]])


In [46]:
describe(x1)

Type: torch.LongTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0, 1, 2],
        [3, 4, 5]])


In [45]:
# matrix mult
describe(torch.mm(x1.float(), x2))

Type: torch.FloatTensor
Shape/size: torch.Size([2, 2])
Values: 
tensor([[ 3.,  6.],
        [12., 24.]])


In [47]:
# gradient bookkeeping
x = torch.ones(2, 2, requires_grad=True)
describe(x)
print(x.grad is None)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 2])
Values: 
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
True


In [48]:
y = (x + 2) * (x + 5) + 3
describe(y)
print(x.grad is None)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 2])
Values: 
tensor([[21., 21.],
        [21., 21.]], grad_fn=<AddBackward0>)
True


In [49]:
z = y.mean()
describe(z)
z.backward()
print(x.grad is None)

Type: torch.FloatTensor
Shape/size: torch.Size([])
Values: 
21.0
False


In [50]:
x.grad

tensor([[2.2500, 2.2500],
        [2.2500, 2.2500]])

In [51]:
y.grad

  y.grad


In [52]:
# TODO: calculate by hand how to get x.grad above, 4 partial derivatives...


### CUDA

Check if it's available

In [53]:
print (torch.cuda.is_available())

False


## Exerices

Create a 2D tensor and then add a dimension of size 1 inserted at dimension 0.

Remove the extra dimension you just added to the previous tensor.

Create a random tensor of shape 5x3 in the interval [3, 7)

Create a tensor with values from a normal distribution (mean=0, std=1).

Retrieve the indexes of all the nonzero elements in the tensor torch.Tensor([1, 1, 1, 0, 1]).

Create a random tensor of size (3,1) and then horizontally stack four copies together.

Return the batch matrix-matrix product of two three-dimensional matrices (a=torch.rand(3,4,5), b=torch.rand(3,5,4)).

Return the batch matrix-matrix product of a 3D matrix and a 2D matrix (a=torch.rand(3,4,5), b=torch.rand(5,4)).

In [29]:
# 1
x = torch.arange(6).view(2,3)

x.view(1, 2, 3)

tensor([[[0, 1, 2],
         [3, 4, 5]]])

In [30]:
x.unsqueeze(0)


tensor([[[0, 1, 2],
         [3, 4, 5]]])

In [5]:
# 2
x.view(1, 2, 3).view(2, 3)

tensor([[0, 1, 2],
        [3, 4, 5]])

In [31]:
x.unsqueeze(0).squeeze(0)

tensor([[0, 1, 2],
        [3, 4, 5]])

In [7]:
# 3
torch.randint(3,7, (5,3))

tensor([[5, 6, 6],
        [3, 6, 4],
        [3, 3, 5],
        [3, 6, 3],
        [6, 4, 6]])

In [8]:
# 4

torch.randn(9)

tensor([-0.2537, -0.9215,  0.9621,  1.2693,  0.5032,  0.0571, -0.8009,  1.0839,
         1.5446])

In [36]:
a = torch.rand(3, 3)

a.normal_(0,1)

tensor([[-0.9915,  0.2794, -0.6566],
        [ 0.2720, -1.6217, -0.0867],
        [ 0.0418,  1.3955,  0.2542]])

In [15]:
# 5
x = torch.Tensor([1, 1, 1, 0, 1])

x.nonzero()

tensor([[1.],
        [1.],
        [1.],
        [1.]])

In [38]:
# 6
x = torch.rand(3,1)

torch.stack([x]*4, dim=1).shape

torch.Size([3, 4, 1])

In [39]:
torch.stack([x]*4, dim=1)

tensor([[[0.3140],
         [0.3140],
         [0.3140],
         [0.3140]],

        [[0.5019],
         [0.5019],
         [0.5019],
         [0.5019]],

        [[0.0287],
         [0.0287],
         [0.0287],
         [0.0287]]])

In [40]:
x.expand(-1, 4)

tensor([[0.3140, 0.3140, 0.3140, 0.3140],
        [0.5019, 0.5019, 0.5019, 0.5019],
        [0.0287, 0.0287, 0.0287, 0.0287]])

In [42]:
# book answer for 6
a = torch.rand(3, 1)

a.expand(3, 4).shape

tensor([[0.3140, 0.3140, 0.3140, 0.3140],
        [0.5019, 0.5019, 0.5019, 0.5019],
        [0.0287, 0.0287, 0.0287, 0.0287]])

In [24]:
# 7
a=torch.rand(3,4,5)
b=torch.rand(3,5,4)

torch.bmm(a, b)

tensor([[[1.0258, 1.1644, 0.9139, 0.7427],
         [1.4088, 1.3895, 1.2815, 0.9656],
         [1.5159, 2.1135, 1.3461, 1.5038],
         [1.4555, 2.0858, 1.2585, 2.0362]],

        [[0.6844, 0.6075, 0.9219, 1.0322],
         [1.0880, 0.4310, 0.6882, 0.8882],
         [1.3135, 0.4166, 1.1533, 1.2595],
         [1.0065, 0.3419, 0.5405, 0.6975]],

        [[1.5759, 1.0123, 1.1776, 0.5544],
         [2.3901, 1.5973, 1.4732, 0.9060],
         [1.4356, 0.6631, 0.9204, 0.2733],
         [1.4036, 0.8689, 0.8653, 0.5686]]])

In [25]:
a @ b

tensor([[[1.0258, 1.1644, 0.9139, 0.7427],
         [1.4088, 1.3895, 1.2815, 0.9656],
         [1.5159, 2.1135, 1.3461, 1.5038],
         [1.4555, 2.0858, 1.2585, 2.0362]],

        [[0.6844, 0.6075, 0.9219, 1.0322],
         [1.0880, 0.4310, 0.6882, 0.8882],
         [1.3135, 0.4166, 1.1533, 1.2595],
         [1.0065, 0.3419, 0.5405, 0.6975]],

        [[1.5759, 1.0123, 1.1776, 0.5544],
         [2.3901, 1.5973, 1.4732, 0.9060],
         [1.4356, 0.6631, 0.9204, 0.2733],
         [1.4036, 0.8689, 0.8653, 0.5686]]])

In [43]:
# 8
a=torch.rand(3,4,5)
b=torch.rand(5,4)

a @ b

tensor([[[0.4511, 1.3815, 0.7712, 1.0730],
         [0.8487, 1.3909, 1.3189, 1.5170],
         [1.0298, 1.8820, 1.3994, 1.2546],
         [0.2515, 0.8336, 0.5652, 0.9841]],

        [[1.0597, 1.6672, 1.5299, 1.3734],
         [1.2323, 1.2767, 1.8032, 1.5252],
         [0.9006, 1.1483, 1.2168, 0.8981],
         [0.5011, 1.7527, 0.7966, 1.1407]],

        [[0.6718, 0.8191, 0.9845, 0.9435],
         [0.7925, 1.0501, 1.1183, 0.8475],
         [0.7715, 1.4530, 1.1483, 1.2123],
         [1.0839, 1.8093, 1.7615, 1.9537]]])

In [44]:
# completely missed this one in terms of the operation NOT result
torch.bmm(a, b.unsqueeze(0).expand(a.size(0), *b.size()))

tensor([[[0.4511, 1.3815, 0.7712, 1.0730],
         [0.8487, 1.3909, 1.3189, 1.5170],
         [1.0298, 1.8820, 1.3994, 1.2546],
         [0.2515, 0.8336, 0.5652, 0.9841]],

        [[1.0597, 1.6672, 1.5299, 1.3734],
         [1.2323, 1.2767, 1.8032, 1.5252],
         [0.9006, 1.1483, 1.2168, 0.8981],
         [0.5011, 1.7527, 0.7966, 1.1407]],

        [[0.6718, 0.8191, 0.9845, 0.9435],
         [0.7925, 1.0501, 1.1183, 0.8475],
         [0.7715, 1.4530, 1.1483, 1.2123],
         [1.0839, 1.8093, 1.7615, 1.9537]]])

In [46]:
b

tensor([[0.6078, 0.1987, 0.7476, 0.1331],
        [0.0172, 0.7491, 0.3195, 0.9480],
        [0.6359, 0.7049, 0.9775, 0.9920],
        [0.3063, 0.3924, 0.4412, 0.5589],
        [0.1734, 0.9405, 0.1209, 0.0862]])

In [45]:
b.unsqueeze(0).expand(a.size(0), *b.size())

tensor([[[0.6078, 0.1987, 0.7476, 0.1331],
         [0.0172, 0.7491, 0.3195, 0.9480],
         [0.6359, 0.7049, 0.9775, 0.9920],
         [0.3063, 0.3924, 0.4412, 0.5589],
         [0.1734, 0.9405, 0.1209, 0.0862]],

        [[0.6078, 0.1987, 0.7476, 0.1331],
         [0.0172, 0.7491, 0.3195, 0.9480],
         [0.6359, 0.7049, 0.9775, 0.9920],
         [0.3063, 0.3924, 0.4412, 0.5589],
         [0.1734, 0.9405, 0.1209, 0.0862]],

        [[0.6078, 0.1987, 0.7476, 0.1331],
         [0.0172, 0.7491, 0.3195, 0.9480],
         [0.6359, 0.7049, 0.9775, 0.9920],
         [0.3063, 0.3924, 0.4412, 0.5589],
         [0.1734, 0.9405, 0.1209, 0.0862]]])