# Introduction to PyTorch Tensors

In [2]:
import torch
import math

## Creating Tensors

In [3]:
x = torch.empty(3, 4)
print(type(x))
print(x)

<class 'torch.Tensor'>
tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])


In [4]:
zeros = torch.zeros(2, 3)
print(zeros)

ones = torch.ones(2, 3)
print(ones)

torch.manual_seed(1729)
random = torch.rand(2, 3)
print(random)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[0.3126, 0.3791, 0.3087],
        [0.0736, 0.4216, 0.0691]])


### Random Tensors and Seeding

In [5]:
torch.manual_seed(1729)
rand1 = torch.rand(2, 3)
print(rand1)

rand2 = torch.rand(2, 3)
print(rand2)

torch.manual_seed(1729)  # seed has been reset
rand3 = torch.rand(2, 3)
print(rand3)

random4 = torch.rand(2, 3)
print(rand4)

tensor([[0.3126, 0.3791, 0.3087],
        [0.0736, 0.4216, 0.0691]])
tensor([[0.2332, 0.4047, 0.2162],
        [0.9927, 0.4128, 0.5938]])
tensor([[0.3126, 0.3791, 0.3087],
        [0.0736, 0.4216, 0.0691]])
tensor([[0.2332, 0.4047, 0.2162],
        [0.9927, 0.4128, 0.5938]])


### Tensor Shapes

In [6]:
x = torch.empty(2, 2, 3)
print(x.shape)
print(x)

empty_like_x = torch.empty_like(x)
print(empty_like_x.shape)
print(empty_like_x)

zeros_like_x = torch.zeros_like(x)
print(zeros_like_x.shape)
print(zeros_like_x)

ones_like_x = torch.ones_like(x)
print(ones_like_x.shape)
print(ones_like_x)

rand_like_x = torch.rand_like(x)
print(rand_like_x.shape)
print(rand_like_x)

torch.Size([2, 2, 3])
tensor([[[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]]])
torch.Size([2, 2, 3])
tensor([[[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]]])
torch.Size([2, 2, 3])
tensor([[[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]]])
torch.Size([2, 2, 3])
tensor([[[1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.]]])
torch.Size([2, 2, 3])
tensor([[[0.6128, 0.1519, 0.0453],
         [0.5035, 0.9978, 0.3884]],

        [[0.6929, 0.1703, 0.1384],
         [0.4759, 0.7481, 0.0361]]])


In [43]:
some_constants = torch.tensor([[3.1415926, 2.71828], [1.61803, 0.0072897]])
print(some_constants)

some_ints = torch.tensor((2, 3, 5, 7, 11, 13, 17, 19))
print(some_ints)

more_ints = torch.tensor(((2, 4, 6), [3, 6, 9]))
print(more_ints)

# torch.tensor creates a copy of the data
data = [[1,2,3], [4,5,6]]
torch.tensor(data)
print(data)

tensor([[3.1416, 2.7183],
        [1.6180, 0.0073]])
tensor([ 2,  3,  5,  7, 11, 13, 17, 19])
tensor([[2, 4, 6],
        [3, 6, 9]])
[[1, 2, 3], [4, 5, 6]]


### Tensor Data Types

In [11]:
a = torch.ones((2, 3), dtype=torch.int16)
print(a)

b = torch.rand((2, 3), dtype=torch.float64) * 20.
print(b)

c = b.to(torch.int32)
print(c)

tensor([[1, 1, 1],
        [1, 1, 1]], dtype=torch.int16)
tensor([[11.2406, 11.2083, 11.6692],
        [18.3283,  0.2118, 18.4972]], dtype=torch.float64)
tensor([[11, 11, 11],
        [18,  0, 18]], dtype=torch.int32)


### Maths & Logic with PyTorch Tensors

In [12]:
ones = torch.zeros(2, 2) + 1
twos = torch.ones(2, 2) * 2
threes = (torch.ones(2, 2) * 7 - 1) / 2
fours = twos ** 2
sqrt2s = twos ** 0.5

print(ones)
print(twos)
print(threes)
print(fours)
print(sqrt2s)

tensor([[1., 1.],
        [1., 1.]])
tensor([[2., 2.],
        [2., 2.]])
tensor([[3., 3.],
        [3., 3.]])
tensor([[4., 4.],
        [4., 4.]])
tensor([[1.4142, 1.4142],
        [1.4142, 1.4142]])


In [13]:
powers2 = twos ** torch.tensor([[1, 2], [3, 4]])
print(powers2)

fives = ones + fours
print(fives)

dozens = threes * fours
print(dozens)

tensor([[ 2.,  4.],
        [ 8., 16.]])
tensor([[5., 5.],
        [5., 5.]])
tensor([[12., 12.],
        [12., 12.]])


It’s important to note here that all of the tensors in the previous code cell were of identical shape. What happens when we try to perform a binary operation on tensors if dissimilar shape?

In [15]:
a = torch.rand(2, 3)
b = torch.rand(3, 2)

try:
    print(a * b)
except RuntimeError as err:
    print("RuntimeError:", err)

RuntimeError: The size of tensor a (3) must match the size of tensor b (2) at non-singleton dimension 1


### In Brief: Tensor Broadcasting

The exception to the same-shapes rule is *tensor broadcasting*.

In [3]:
rand = torch.rand(2, 4)
doubled = rand * (torch.ones(1, 4) * 2)

print(rand)
print(doubled)

tensor([[0.7178, 0.0287, 0.4166, 0.4081],
        [0.0356, 0.8657, 0.7561, 0.1453]])
tensor([[1.4356, 0.0575, 0.8333, 0.8162],
        [0.0712, 1.7315, 1.5122, 0.2905]])


Broadcasting is a way to perform an operation between tensors that have similarities in their shapes. In the example above, the 1-row, 4-column tensor is multiplied by *both rows* of the 2-row, 4-column tensor.

The rules for broadcasting are:
* Each tensor must have at least one dimension - no empty tensors.
* Comparing the dimension sizes of the two tensors, *going from last to first:*
    * Each dimension must be equal, *or*
    * One of the dimensions must be of size 1, *or*
    * The dimension does not exist in one of the tensors

In [3]:
a =     torch.ones(4, 3, 2)

b = a * torch.rand(   3, 2)  # dim 2 & 3 same as a, dim 1 absent
print(b)

c = a * torch.rand(    3, 1)  # dim 3 = 1, dim 2 same as a
print(c)

d = a * torch.rand(    1, 2)  # dim 3 same as a, dim 2 = 1
print(d)

tensor([[[0.2742, 0.2243],
         [0.4751, 0.3848],
         [0.0053, 0.5475]],

        [[0.2742, 0.2243],
         [0.4751, 0.3848],
         [0.0053, 0.5475]],

        [[0.2742, 0.2243],
         [0.4751, 0.3848],
         [0.0053, 0.5475]],

        [[0.2742, 0.2243],
         [0.4751, 0.3848],
         [0.0053, 0.5475]]])
tensor([[[0.2765, 0.2765],
         [0.0632, 0.0632],
         [0.9048, 0.9048]],

        [[0.2765, 0.2765],
         [0.0632, 0.0632],
         [0.9048, 0.9048]],

        [[0.2765, 0.2765],
         [0.0632, 0.0632],
         [0.9048, 0.9048]],

        [[0.2765, 0.2765],
         [0.0632, 0.0632],
         [0.9048, 0.9048]]])
tensor([[[0.1875, 0.3266],
         [0.1875, 0.3266],
         [0.1875, 0.3266]],

        [[0.1875, 0.3266],
         [0.1875, 0.3266],
         [0.1875, 0.3266]],

        [[0.1875, 0.3266],
         [0.1875, 0.3266],
         [0.1875, 0.3266]],

        [[0.1875, 0.3266],
         [0.1875, 0.3266],
         [0.1875, 0.3266]]])


In [6]:
a =         torch.ones(4, 3, 2)

try:
    b = a * torch.rand(4, 3)
except RuntimeError as err:
    print(err)  # dimensions must match last-to-first

try:
    c = a * torch.rand(   2, 3)
except RuntimeError as err:
    print(err)  # both 3rd & 2nd dims different

try:
    d = a * torch.rand((0, ))
except RuntimeError as err:
    print(err)  # can't broadcast with an empty tensor

The size of tensor a (2) must match the size of tensor b (3) at non-singleton dimension 2
The size of tensor a (2) must match the size of tensor b (3) at non-singleton dimension 2
The size of tensor a (2) must match the size of tensor b (0) at non-singleton dimension 2


### More Maths with Tensors

In [14]:
# Common functions
a = torch.rand(2, 4) * 2 - 1
print("Common functions:")
print(torch.abs(a))
print(torch.ceil(a))
print(torch.floor(a))
print(torch.clamp(a, -0.5, 0.5))

# Trig functions
angles = torch.tensor([0, math.pi / 4, math.pi / 2, 3 * math.pi / 4])
sines = torch.sin(angles)
inverses = torch.asin(sines)
print('\nSine and arcsine:')
print(angles)
print(sines)
print(inverses)

# Bitwise operations
print('\nBitwise XOR:')
b = torch.tensor([1, 5, 11])
c = torch.tensor([2, 7, 10])
print(torch.bitwise_xor(b, c))

Common functions:
tensor([[0.0820, 0.7429, 0.8884, 0.4138],
        [0.5483, 0.8741, 0.2812, 0.2862]])
tensor([[1., -0., 1., 1.],
        [-0., -0., 1., -0.]])
tensor([[ 0., -1.,  0.,  0.],
        [-1., -1.,  0., -1.]])
tensor([[ 0.0820, -0.5000,  0.5000,  0.4138],
        [-0.5000, -0.5000,  0.2812, -0.2862]])

Sine and arcsine:
tensor([0.0000, 0.7854, 1.5708, 2.3562])
tensor([0.0000, 0.7071, 1.0000, 0.7071])
tensor([0.0000, 0.7854, 1.5708, 0.7854])

Bitwise XOR:
tensor([3, 2, 1])


In [15]:
# Comparisons
print('\nBroadcasted, element-wise equality comparison:')
d = torch.tensor([[1., 2.], [3., 4.]])
e = torch.ones(1, 2)   # many comparison ops support broadcasting!
print(torch.eq(d, e))  # returns tensor of type bool

# Vector and linear algebra
v1 = torch.tensor([1., 0., 0.])          # x unit vector
v2 = torch.tensor([0., 1., 0.])          # y unit vector
m1 = torch.rand(2, 2)                    # random matrix
m2 = torch.tensor([[3., 0.], [0., 3.]])  # three times identity matrix

print('\nVectors & Matrices:')
print(torch.cross(v2, v1))  # negative of z unit vector (v1 x v2 == -v2 x v1)
print(m1)
m3 = torch.matmul(m1, m2)
print(m3)                   # 3 times m1
print(torch.svd(m3))        # singular value decomposition


Broadcasted, element-wise equality comparison:
tensor([[ True, False],
        [False, False]])

Vectors & Matrices:
tensor([ 0.,  0., -1.])
tensor([[0.4818, 0.2718],
        [0.2758, 0.1531]])
tensor([[1.4453, 0.8154],
        [0.8274, 0.4593]])
torch.return_types.svd(
U=tensor([[-0.8687, -0.4954],
        [-0.4954,  0.8687]]),
S=tensor([1.9103, 0.0057]),
V=tensor([[-0.8718,  0.4899],
        [-0.4899, -0.8718]]))


### Altering Tensors in Place

There are times when we may wish to alter a tensor in place. For this, most of the math functions have a version with an appended underscore ( `_` ) that will alter a tensor in place.

In [16]:
a = torch.tensor([0, math.pi/4, math.pi/2, 3*math.pi/4])
print("a:")
print(a)
print(torch.sin(a))  # creates new tensor in memory
print(a)             # a hasn't changed

b = torch.tensor([0, math.pi/4, math.pi/2, 3*math.pi/4])
print("\nb:")
print(b)
print(torch.sin_(b))  # added underscore
print(b)              # b changed

a:
tensor([0.0000, 0.7854, 1.5708, 2.3562])
tensor([0.0000, 0.7071, 1.0000, 0.7071])
tensor([0.0000, 0.7854, 1.5708, 2.3562])

b:
tensor([0.0000, 0.7854, 1.5708, 2.3562])
tensor([0.0000, 0.7071, 1.0000, 0.7071])
tensor([0.0000, 0.7071, 1.0000, 0.7071])


In [17]:
a = torch.ones(2, 2)
b = torch.rand(2, 2)

print("Before:")
print(a)
print(b)

print("\nAfter adding:")
print(a.add_(b))
print(a)
print(b)

print("\nAfter multiplying:")
print(b.mul_(b))
#print(b)

Before:
tensor([[1., 1.],
        [1., 1.]])
tensor([[0.0713, 0.0398],
        [0.8330, 0.0417]])

After adding:
tensor([[1.0713, 1.0398],
        [1.8330, 1.0417]])
tensor([[1.0713, 1.0398],
        [1.8330, 1.0417]])
tensor([[0.0713, 0.0398],
        [0.8330, 0.0417]])

After multiplying:
tensor([[0.0051, 0.0016],
        [0.6939, 0.0017]])
tensor([[0.0051, 0.0016],
        [0.6939, 0.0017]])


In [20]:
# Many of the methods and functions seen so far have 
# `out` argument to specify tensor to receive the output

a = torch.rand(2, 2)
b = torch.rand(2, 2)
c = torch.zeros(2, 2)
old_id = id(c)

print(c)
d = torch.matmul(a, b, out=c)
print(c)  # c contents changed

assert c is d         # test c & d are same obj, not just equal in val
assert id(c), old_id  # make sure new c sae obj as old one

torch.rand(2, 2, out=c)
print(c)  # c changed again
assert id(c), old_id

tensor([[0., 0.],
        [0., 0.]])
tensor([[0.3058, 0.3135],
        [0.7309, 0.7532]])
tensor([[0.0200, 0.6959],
        [0.1177, 0.4369]])


## Copying Tensors

In [21]:
# As with any Python object, assigning tensor to var makes var label of tensor; doesn't copy it

a = torch.ones(2, 2)
b = a

a[0][1] = 561  # changing a
print(b)       # ...also alters b

tensor([[  1., 561.],
        [  1.,   1.]])


In [22]:
# To work on separate copy of data, use clone() method

a = torch.ones(2, 2)
b = a.clone()

assert b is not a      # different objs in memory...
print(torch.eq(a, b))  # ...but still same contents

a[0][1] = 561  # changing a
print(b)       # ...doesn't change b

tensor([[True, True],
        [True, True]])
tensor([[1., 1.],
        [1., 1.]])


In [24]:
# If source tensor has autograd, but don't want clone to track gradients

a = torch.rand(2, 2, requires_grad=True)  # turn on autograd
print(a)

b = a.clone()
print(b)

c = a.detach().clone()
print(c)

print(a)

tensor([[0.2196, 0.5041],
        [0.7555, 0.4534]], requires_grad=True)
tensor([[0.2196, 0.5041],
        [0.7555, 0.4534]], grad_fn=<CloneBackward0>)
tensor([[0.2196, 0.5041],
        [0.7555, 0.4534]])
tensor([[0.2196, 0.5041],
        [0.7555, 0.4534]], requires_grad=True)


## Moving to GPU

In [25]:
if torch.cuda.is_available():
    gpu_count = torch.cuda.device_count()
    if gpu_count == 1:
        print("We have a GPU!")
    else:
        print(f"We have {gpu_count} GPUs!")
else:
    print("Sorry, CPU only.")

Sorry, CPU only.


In [26]:
# Getting data onto target device may be done at creation time

if torch.cuda.is_available():
    gpu_rand = torch.rand(2, 2, device="cuda")
    print(gpu_rand)
else:
    print("Sorry, CPU only.")

Sorry, CPU only.


In [27]:
if torch.cuda.is_available():
    my_device = torch.device("cuda")
else:
    my_device = torch.device("cpu")
print("Device:", my_device)

x = torch.rand(2, 2, device=my_device)
print(x)

Device: cpu
tensor([[0.2537, 0.5821],
        [0.5127, 0.6171]])


In [30]:
# Moving existing tensor to another device

y = torch.rand(2, 2)
print(y.device)

y = y.to(my_device)
print(y.device)

cpu
cpu


In [32]:
# All tensors in a computation must be on same device

x = torch.rand(2, 2)
y = torch.rand(2, 2, device="cuda")

try:
    z = x + y
except RuntimeError as err:
    print("RuntimeError:", err)


AssertionError: Torch not compiled with CUDA enabled

## Manipulating Tensor Shapes

### Changing Number of Dimensions

In [33]:
# Making batch of N=1

a = torch.rand(3, 226, 226)
b = a.unsqueeze(0)  # add dimension 
print(a.shape)
print(b.shape)

torch.Size([3, 226, 226])
torch.Size([1, 3, 226, 226])


In [35]:
a = torch.rand(1, 20)
print(a.shape)
print(a)

b = a.squeeze(0)
print(b.shape)
print(b)

c = torch.rand(2, 2)
print(c.shape)

d = c.squeeze(0)
print(d.shape)  # no change as can only squeeze() dim of extent 1

torch.Size([1, 20])
tensor([[0.4206, 0.3805, 0.1938, 0.0572, 0.0231, 0.1395, 0.2469, 0.1049, 0.5591,
         0.9609, 0.1817, 0.7692, 0.5956, 0.5949, 0.5735, 0.3678, 0.1782, 0.0720,
         0.8156, 0.4454]])
torch.Size([20])
tensor([0.4206, 0.3805, 0.1938, 0.0572, 0.0231, 0.1395, 0.2469, 0.1049, 0.5591,
        0.9609, 0.1817, 0.7692, 0.5956, 0.5949, 0.5735, 0.3678, 0.1782, 0.0720,
        0.8156, 0.4454])
torch.Size([2, 2])
torch.Size([2, 2])


In [37]:
# Using unsqueeze() to help broadcasting

a = torch.ones(4, 3, 2)
b = torch.rand(   3)

try:
    print(a * b)
except RuntimeError as err:
    print(err)

c = b.unsqueeze(1)  # make 2D by adding new dim at the end
print(c.shape)
print(a*c)

The size of tensor a (2) must match the size of tensor b (3) at non-singleton dimension 2
torch.Size([3, 1])
tensor([[[0.4772, 0.4772],
         [0.3319, 0.3319],
         [0.6933, 0.6933]],

        [[0.4772, 0.4772],
         [0.3319, 0.3319],
         [0.6933, 0.6933]],

        [[0.4772, 0.4772],
         [0.3319, 0.3319],
         [0.6933, 0.6933]],

        [[0.4772, 0.4772],
         [0.3319, 0.3319],
         [0.6933, 0.6933]]])


In [38]:
# In-place versions

batch_me = torch.rand(3, 226, 226)
print(batch_me.shape)

batch_me.unsqueeze_(0)
print(batch_me.shape)

torch.Size([3, 226, 226])
torch.Size([1, 3, 226, 226])


In [41]:
# Sometimes may want to change tensor shape more radically, e.g. between last conv and linear layer

output3d = torch.rand(6, 20, 20)
print(output3d.shape)

input1d = output3d.reshape(6*20*20)
print(input1d.shape)

# Can also call as method on torch module
print(torch.reshape(output3d, (6*20*20,)).shape)  # tuple expected when specifying tensor shape

torch.Size([6, 20, 20])
torch.Size([2400])
torch.Size([2400])


## NumPy Bridge

In [44]:
import numpy as np

np_array = np.ones((2,3))
print(np_array)

torch_tensor = torch.from_numpy(np_array)
print(torch_tensor)

[[1. 1. 1.]
 [1. 1. 1.]]
tensor([[1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)


In [45]:
torch_rand = torch.rand(2, 3)
print(torch_rand)

np_rand = torch_rand.numpy()
print(np_rand)

tensor([[0.3272, 0.0603, 0.8408],
        [0.9608, 0.8584, 0.4251]])
[[0.32717657 0.06030667 0.8408447 ]
 [0.9607948  0.8584186  0.4251135 ]]


In [47]:
# Converted objs using same underlying memory as sources, so changes to one reflected in other

np_array[1, 1] = 23
print(torch_tensor)

torch_rand[1, 1] = 17
print(np_rand)

tensor([[ 1.,  1.,  1.],
        [ 1., 23.,  1.]], dtype=torch.float64)
[[ 0.32717657  0.06030667  0.8408447 ]
 [ 0.9607948  17.          0.4251135 ]]
