<a href="https://colab.research.google.com/github/hookskl/nlp_w_pytorch/blob/main/nlp_w_pytorch_ch1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Installing Pytorch

In [1]:
## conda install pytorch torchvision -c pytorch

## Creating tensors

In [2]:
## helper function
def describe(x):
  print("Type: {}".format(x.type()))
  print("Shape/size: {}".format(x.shape))
  print("Values: \n{}".format(x))

*Example 1-3. Creating a tensor in PyTorch with torch.Tensor*

In [3]:
import torch
describe(torch.Tensor(2, 3))

Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[4.2898e-35, 0.0000e+00, 3.7835e-44],
        [0.0000e+00,        nan, 0.0000e+00]])


*Example 1-4. Creating a randomly initialized tensor*

In [4]:
describe(torch.rand(2, 3)) # random uniform [0, 1)
print("\n")
print("--------------------------------------")
print("\n")
describe(torch.randn(2, 3)) # random std normal

Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0.0896, 0.8382, 0.8412],
        [0.8584, 0.0186, 0.7882]])


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[-0.0506, -0.4976, -0.1008],
        [-0.4518, -0.9404,  0.7146]])


*Example 1-5. Creating a filled tensor*



In [5]:
describe(torch.zeros(2, 3))
x = torch.ones(2, 3)
print("\n")
print("--------------------------------------")
print("\n")
describe(x)
x.fill_(5) ## methods with underscore "_" is the convention for modifying tensors in place
print("\n")
print("--------------------------------------")
print("\n")
describe(x)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0., 0., 0.],
        [0., 0., 0.]])


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[1., 1., 1.],
        [1., 1., 1.]])


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[5., 5., 5.],
        [5., 5., 5.]])


*Example 1-6. Creating and intializing a tensor from lists*



In [6]:
x = torch.Tensor([[1, 2, 3],
                  [4, 5, 6]])
describe(x)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[1., 2., 3.],
        [4., 5., 6.]])


*Example 1-7. Creating and initializing a tensor from NumPy*

In [7]:
import numpy as np

npy = np.random.rand(2, 3)
describe(torch.from_numpy(npy)) # note the type DoubleTensor (not default FloatTensor)

Type: torch.DoubleTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0.7864, 0.0132, 0.8298],
        [0.9465, 0.4658, 0.2705]], dtype=torch.float64)


## Tensor Types and Size

*Example 1-8. Tensor properties*

In [8]:
x = torch.FloatTensor([[1, 2, 3],
                       [4, 5, 6]])
describe(x)
print("\n")
print("--------------------------------------")
print("\n")
x = x.long()
describe(x)
print("\n")
print("--------------------------------------")
print("\n")
x = torch.tensor([[1, 2, 3],
                  [4, 5, 6]], dtype=torch.int64)
describe(x)
print("\n")
print("--------------------------------------")
print("\n")
x = x.float()
describe(x)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[1., 2., 3.],
        [4., 5., 6.]])


--------------------------------------


Type: torch.LongTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[1, 2, 3],
        [4, 5, 6]])


--------------------------------------


Type: torch.LongTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[1, 2, 3],
        [4, 5, 6]])


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[1., 2., 3.],
        [4., 5., 6.]])


## Tensor Operations

*Example 1-9. Tensor operations: addition*



In [9]:
x = torch.randn(2, 3)
describe(x)
print("\n")
print("--------------------------------------")
print("\n")
describe(torch.add(x, x))
print("\n")
print("--------------------------------------")
print("\n")
describe(x + x)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[-0.0784, -0.1302, -1.3039],
        [ 1.0124, -0.1770, -0.5693]])


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[-0.1568, -0.2605, -2.6078],
        [ 2.0247, -0.3540, -1.1386]])


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[-0.1568, -0.2605, -2.6078],
        [ 2.0247, -0.3540, -1.1386]])


*Example 1-10. Dimension-based tensor operations*

In [10]:
x = torch.arange(6)
describe(x)
print("\n")
print("--------------------------------------")
print("\n")
x = x.view(2, 3)
describe(x)
print("\n")
print("--------------------------------------")
print("\n")
describe(torch.sum(x, dim=0))
print("\n")
print("--------------------------------------")
print("\n")
describe(torch.sum(x, dim=1))
print("\n")
print("--------------------------------------")
print("\n")
describe(torch.transpose(x, 0, 1)) # swap rows with columns

Type: torch.LongTensor
Shape/size: torch.Size([6])
Values: 
tensor([0, 1, 2, 3, 4, 5])


--------------------------------------


Type: torch.LongTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0, 1, 2],
        [3, 4, 5]])


--------------------------------------


Type: torch.LongTensor
Shape/size: torch.Size([3])
Values: 
tensor([3, 5, 7])


--------------------------------------


Type: torch.LongTensor
Shape/size: torch.Size([2])
Values: 
tensor([ 3, 12])


--------------------------------------


Type: torch.LongTensor
Shape/size: torch.Size([3, 2])
Values: 
tensor([[0, 3],
        [1, 4],
        [2, 5]])


## Indexing, Slicing, and Joining

*Example 1-11. Slicing and indexing a tensor*

In [11]:
x = torch.arange(6).view(2, 3)
describe(x)
print("\n")
print("--------------------------------------")
print("\n")
describe(x[:1, :2])
print("\n")
print("--------------------------------------")
print("\n")
describe(x[0, 1])

Type: torch.LongTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0, 1, 2],
        [3, 4, 5]])


--------------------------------------


Type: torch.LongTensor
Shape/size: torch.Size([1, 2])
Values: 
tensor([[0, 1]])


--------------------------------------


Type: torch.LongTensor
Shape/size: torch.Size([])
Values: 
1


*Example 1-12. Complex indexing: noncontiguous indexing of a tensor*

In [12]:
indices = torch.LongTensor([0, 2]) # LongTensor type required for indices
describe(torch.index_select(x, dim=1, index=indices))
print("\n")
print("--------------------------------------")
print("\n")
indices = torch.LongTensor([0, 0])
describe(torch.index_select(x, dim=0, index=indices))
print("\n")
print("--------------------------------------")
print("\n")
row_indices = torch.arange(2).long()
col_indices = torch.LongTensor([0, 1])
describe(x[row_indices, col_indices])

Type: torch.LongTensor
Shape/size: torch.Size([2, 2])
Values: 
tensor([[0, 2],
        [3, 5]])


--------------------------------------


Type: torch.LongTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0, 1, 2],
        [0, 1, 2]])


--------------------------------------


Type: torch.LongTensor
Shape/size: torch.Size([2])
Values: 
tensor([0, 4])


*Example 1-13. Concatenating tensors*

In [13]:
x = torch.arange(6).view(2, 3)
describe(x)
print("\n")
print("--------------------------------------")
print("\n")
describe(torch.cat([x, x], dim=0))
print("\n")
print("--------------------------------------")
print("\n")
describe(torch.cat([x, x], dim=1))
print("\n")
print("--------------------------------------")
print("\n")
describe(torch.stack([x, x]))

Type: torch.LongTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0, 1, 2],
        [3, 4, 5]])


--------------------------------------


Type: torch.LongTensor
Shape/size: torch.Size([4, 3])
Values: 
tensor([[0, 1, 2],
        [3, 4, 5],
        [0, 1, 2],
        [3, 4, 5]])


--------------------------------------


Type: torch.LongTensor
Shape/size: torch.Size([2, 6])
Values: 
tensor([[0, 1, 2, 0, 1, 2],
        [3, 4, 5, 3, 4, 5]])


--------------------------------------


Type: torch.LongTensor
Shape/size: torch.Size([2, 2, 3])
Values: 
tensor([[[0, 1, 2],
         [3, 4, 5]],

        [[0, 1, 2],
         [3, 4, 5]]])


*Example 1-14. Linear algebra on tensors: multiplication*

In [14]:
x1 = torch.arange(6).view(2, 3).float()
describe(x1)
print("\n")
print("--------------------------------------")
print("\n")
x2 = torch.ones(3, 2)
x2[:, 1] += 1
describe(x2)
print("\n")
print("--------------------------------------")
print("\n")
describe(torch.mm(x1, x2))

Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0., 1., 2.],
        [3., 4., 5.]])


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([3, 2])
Values: 
tensor([[1., 2.],
        [1., 2.],
        [1., 2.]])


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([2, 2])
Values: 
tensor([[ 3.,  6.],
        [12., 24.]])


## Tensors and Computational Graphs

*Example 1-15. Creating tensors for gradient bookkeeping*

In [15]:
x = torch.ones(2, 2, requires_grad=True) # requires_grad=True means PyTorch will track info to compute gradients
describe(x)
print(x.grad is None)
print("\n")
print("--------------------------------------")
print("\n")
y = (x + 2) * (x + 5) + 3
describe(y)
print(x.grad is None)
print("\n")
print("--------------------------------------")
print("\n")
z = y.mean()
describe(z)
z.backward()
print(x.grad is None)

Type: torch.FloatTensor
Shape/size: torch.Size([2, 2])
Values: 
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
True


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([2, 2])
Values: 
tensor([[21., 21.],
        [21., 21.]], grad_fn=<AddBackward0>)
True


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([])
Values: 
21.0
False


## CUDA Tensors

*Example 1-16. Creating CUDA tensors*

In [17]:
print(torch.cuda.is_available())

# preferred method: device agnostic tensor instantiation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("\n")
print("--------------------------------------")
print("\n")
print(device)
print("\n")
print("--------------------------------------")
print("\n")
x = torch.rand(3, 3).to(device)
describe(x)

True


--------------------------------------


cuda


--------------------------------------


Type: torch.cuda.FloatTensor
Shape/size: torch.Size([3, 3])
Values: 
tensor([[0.5651, 0.7647, 0.2521],
        [0.0660, 0.7847, 0.2272],
        [0.6024, 0.6149, 0.7954]], device='cuda:0')


*Example 1-17. Mixing CUDA tensors with CPU-bound tensors*

In [18]:
y = torch.rand(3, 3) # cpu-bound
x + y # causes error

RuntimeError: ignored

In [19]:
cpu_device = torch.device("cpu") # it's more costly to move to gpu, so generally tensors are moved to cpu when gpu is not needed
y = y.to(cpu_device)
x = x.to(cpu_device)
x + y

tensor([[0.8148, 1.5207, 1.1395],
        [0.6034, 1.4715, 0.7976],
        [1.5261, 0.7353, 1.6350]])

## Exercises

1. Create a 2D tensor and then add a dimension of size 1 inserted at dimension 0.

In [22]:
x = torch.rand(2, 2)
describe(x)
print("\n")
print("--------------------------------------")
print("\n")
describe(x.unsqueeze_(0))

Type: torch.FloatTensor
Shape/size: torch.Size([2, 2])
Values: 
tensor([[0.0584, 0.1167],
        [0.6345, 0.9847]])


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([1, 2, 2])
Values: 
tensor([[[0.0584, 0.1167],
         [0.6345, 0.9847]]])


2. Remove the extra dimension you just added to the previous tensor.

In [23]:
describe(x)
print("\n")
print("--------------------------------------")
print("\n")
describe(x.squeeze_(0))

Type: torch.FloatTensor
Shape/size: torch.Size([1, 2, 2])
Values: 
tensor([[[0.0584, 0.1167],
         [0.6345, 0.9847]]])


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([2, 2])
Values: 
tensor([[0.0584, 0.1167],
        [0.6345, 0.9847]])


3. Create a random tensor of shape 5x3 in the interval [3, 7)

In [25]:
x = torch.rand(3, 5)
describe(x)
print("\n")
print("--------------------------------------")
print("\n")
describe(3 + x * (7 - 3))

Type: torch.FloatTensor
Shape/size: torch.Size([3, 5])
Values: 
tensor([[0.2580, 0.2639, 0.8189, 0.1889, 0.7132],
        [0.6985, 0.9447, 0.2736, 0.7299, 0.4296],
        [0.4488, 0.0205, 0.4993, 0.9360, 0.9743]])


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([3, 5])
Values: 
tensor([[4.0321, 4.0558, 6.2755, 3.7558, 5.8530],
        [5.7940, 6.7786, 4.0942, 5.9196, 4.7185],
        [4.7951, 3.0820, 4.9971, 6.7441, 6.8972]])


4. Create a tensor with values from a std normal distrbution

In [26]:
torch.randn(2, 3)

tensor([[-0.8240, -0.9471, -0.4138],
        [-1.5695, -0.9133, -0.4082]])

5. Retrieve the indexes of all the nonzero elements in the tensor `torch.Tensor([1, 1, 1, 0, 1])`

In [29]:
x = torch.Tensor([1, 1, 1, 0, 1])
torch.nonzero(x)

tensor([[0],
        [1],
        [2],
        [4]])

6. Create a random tensor of size (3, 1) and then horizontally stack four copies together

In [33]:
x = torch.rand(3, 1)
describe(x)
print("\n")
print("--------------------------------------")
print("\n")
describe(torch.cat([x, x, x, x], dim=1))
print("\n")
print("--------------------------------------")
print("\n")
describe(x.expand(3, 4))

Type: torch.FloatTensor
Shape/size: torch.Size([3, 1])
Values: 
tensor([[0.1177],
        [0.0529],
        [0.8265]])


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([3, 4])
Values: 
tensor([[0.1177, 0.1177, 0.1177, 0.1177],
        [0.0529, 0.0529, 0.0529, 0.0529],
        [0.8265, 0.8265, 0.8265, 0.8265]])


--------------------------------------


Type: torch.FloatTensor
Shape/size: torch.Size([3, 4])
Values: 
tensor([[0.1177, 0.1177, 0.1177, 0.1177],
        [0.0529, 0.0529, 0.0529, 0.0529],
        [0.8265, 0.8265, 0.8265, 0.8265]])


7. Return the batch matrix-matrix product of two three-dimensional matrices `(a=torch.rand(3,4,5), b=torch.rand(3,5,4))`

In [36]:
a = torch.rand(3, 4, 5)
b = torch.rand(3, 5, 4)

describe(torch.bmm(a, b))

Type: torch.FloatTensor
Shape/size: torch.Size([3, 4, 4])
Values: 
tensor([[[1.3284, 1.4105, 1.7711, 1.5589],
         [0.9383, 1.2593, 1.5796, 0.9207],
         [1.6320, 1.6876, 2.2987, 1.4019],
         [1.2756, 0.6339, 1.3705, 0.8241]],

        [[0.6688, 0.7346, 1.2532, 1.6280],
         [0.4803, 0.6169, 1.1120, 1.1937],
         [0.5746, 0.8709, 1.5364, 1.6537],
         [0.7007, 1.4170, 1.5839, 2.0052]],

        [[1.1008, 1.1324, 1.8727, 0.9075],
         [1.2670, 1.2187, 2.4290, 1.5998],
         [1.1604, 1.1897, 2.0142, 1.0410],
         [1.4365, 1.4377, 2.2139, 1.5145]]])


8. Return the batch matrix-matrix product of a 3D matrix and a 2D matrix `(a=torch.rand(3, 4, 5), b=torch.rand(5, 4))`

In [41]:
a = torch.rand(3, 4, 5)
b = torch.rand(5, 4)

torch.bmm(a, b.unsqueeze(0).expand(a.size(0), *b.size()))

tensor([[[1.5475, 0.7278, 1.7180, 1.1074],
         [1.7764, 0.6134, 1.5897, 0.5841],
         [1.9283, 1.1113, 2.1924, 1.1190],
         [1.4988, 0.7145, 1.6317, 0.6600]],

        [[1.3768, 0.6847, 1.5956, 0.8898],
         [0.7888, 0.5512, 1.2090, 0.5307],
         [0.5927, 0.2949, 0.9706, 0.7381],
         [2.0606, 0.9512, 2.5210, 1.6165]],

        [[1.6639, 0.8169, 2.0930, 1.1946],
         [1.5292, 0.6741, 1.5519, 0.7521],
         [1.7463, 0.7795, 1.6878, 0.7874],
         [1.9922, 0.6543, 1.8142, 1.0579]]])