In [2]:
import torch
import math

### Create a tensor & its datatype

Convert the datatype using .to()

torch.bool

torch.int8

torch.uint8

torch.int16

torch.int32

torch.int64

torch.half

torch.float

torch.double

torch.bfloat


In [16]:
#random-looking values when printing your tensor. The torch.empty() call allocates memory for the tensor, but does not initialize it with any values - so what you’re seeing is whatever was in memory at the time of allocation.
x = torch.empty(2,3) # default datatype is float32
print(type(x))
print(x)

zeros = torch.zeros(2,3) # default datatype is float32
print(zeros)

ones = torch.ones(2,3) # default datatype is float32
print(ones)

torch.manual_seed(42) # assurance of the reproducibility of your results.
random = torch.rand(2,3)
print(random)

tenInt16 = torch.ones(2,3, dtype=torch.int16)
print(tenInt16)

tenF16 = torch.ones((2,3), dtype=torch.float16)
print(tenF16)

tenF64 = tenF16.double()
print(tenF64)
 
tenF64_2 = tenF16.to(torch.float64)
print(tenF64_2) 

<class 'torch.Tensor'>
tensor([[1.6629e-35, 0.0000e+00, 1.5408e-35],
        [0.0000e+00, 1.0000e+00, 1.0000e+00]])
tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[0.8823, 0.9150, 0.3829],
        [0.9593, 0.3904, 0.6009]])
tensor([[1, 1, 1],
        [1, 1, 1]], dtype=torch.int16)
tensor([[1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float16)
tensor([[1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
tensor([[1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)


### Tensor shapes

In [9]:
x = torch.empty(2,3)
print(x)

empty_tensor_like_x = torch.empty_like(x)
print(empty_tensor_like_x.shape)
print(empty_tensor_like_x)

zeros_like_x = torch.zeros_like(x)
print(zeros_like_x.shape)
print(zeros_like_x)

onees_like_x = torch.ones_like(x)
print(onees_like_x.shape)
print(onees_like_x)

rand_like_x = torch.rand_like(x)
print(rand_like_x.shape)
print(rand_like_x)

tensor([[1.8261e-35, 0.0000e+00, 3.3739e+13],
        [4.5629e-41, 8.9683e-44, 0.0000e+00]])
torch.Size([2, 3])
tensor([[1.5165e-35, 0.0000e+00, 1.6996e-35],
        [0.0000e+00, 8.9683e-44, 0.0000e+00]])
torch.Size([2, 3])
tensor([[0., 0., 0.],
        [0., 0., 0.]])
torch.Size([2, 3])
tensor([[1., 1., 1.],
        [1., 1., 1.]])
torch.Size([2, 3])
tensor([[0.2566, 0.7936, 0.9408],
        [0.1332, 0.9346, 0.5936]])


### create a tensor by its content

In [11]:
ten1 = torch.tensor([[1,2,3],[4,5,6]])
print(ten1)

ten2 = torch.tensor((5,6,7,8))
print(ten2)

ten3 = torch.tensor(((1,2,3),[4,5,6]))
print(ten3)



tensor([[1, 2, 3],
        [4, 5, 6]])
tensor([5, 6, 7, 8])
tensor([[1, 2, 3],
        [4, 5, 6]])


### Math with pytorch

In [17]:
ones = torch.zeros(2, 2) + 1
twos = torch.ones(2, 2) * 2
threes = (torch.ones(2, 2) * 7 - 1) / 2
fours = twos ** 2
sqrt2s = twos ** 0.5

print(ones)
print(twos)
print(threes)
print(fours)
print(sqrt2s)


tensor([[1., 1.],
        [1., 1.]])
tensor([[2., 2.],
        [2., 2.]])
tensor([[3., 3.],
        [3., 3.]])
tensor([[4., 4.],
        [4., 4.]])
tensor([[1.4142, 1.4142],
        [1.4142, 1.4142]])


In [18]:
powers2 = twos ** torch.tensor([[1, 2], [3, 4]])
print(powers2)

fives = ones + fours
print(fives)

dozens = threes * fours # this is not a matmul
print(dozens)

tensor([[ 2.,  4.],
        [ 8., 16.]])
tensor([[5., 5.],
        [5., 5.]])
tensor([[12., 12.],
        [12., 12.]])


### Tensor broadcast

How is it we got to multiply a 2x4 tensor by a 1x4 tensor?

Broadcasting is a way to perform an operation between tensors that have similarities in their shapes. In the example above, the one-row, four-column tensor is multiplied by both rows of the two-row, four-column tensor.

This is an important operation in Deep Learning. The common example is multiplying a tensor of learning weights by a batch of input tensors, applying the operation to each instance in the batch separately, and returning a tensor of identical shape - just like our (2, 4) * (1, 4) example above returned a tensor of shape (2, 4).

The rules for broadcasting are:

Each tensor must have at least one dimension - no empty tensors.

Comparing the dimension sizes of the two tensors, going from last to first:

Each dimension must be equal, or

One of the dimensions must be of size 1, or

The dimension does not exist in one of the tensors

Tensors of identical shape, of course, are trivially “broadcastable”, as you saw earlier.

In [24]:
rand = torch.rand(2, 4)
doubled = rand * (torch.ones(1, 4) * 2)

print(rand)
print(doubled)

a =     torch.ones(4, 3, 2)
mm = torch.rand(   3, 2) # 3rd & 2nd dims identical to a, dim 1 absent
print(mm)
b = a * mm
print(b)

mm = torch.rand(   3, 1) # 3rd dim = 1, 2nd dim identical to a
print(mm)
c = a * mm
print(c)

d = a * torch.rand(   1, 2) # 3rd dim identical to a, 2nd dim = 1
print(d)


tensor([[0.6666, 0.9811, 0.0874, 0.0041],
        [0.1088, 0.1637, 0.7025, 0.6790]])
tensor([[1.3333, 1.9623, 0.1747, 0.0081],
        [0.2176, 0.3273, 1.4050, 1.3581]])
tensor([[0.9155, 0.2418],
        [0.1591, 0.7653],
        [0.2979, 0.8035]])
tensor([[[0.9155, 0.2418],
         [0.1591, 0.7653],
         [0.2979, 0.8035]],

        [[0.9155, 0.2418],
         [0.1591, 0.7653],
         [0.2979, 0.8035]],

        [[0.9155, 0.2418],
         [0.1591, 0.7653],
         [0.2979, 0.8035]],

        [[0.9155, 0.2418],
         [0.1591, 0.7653],
         [0.2979, 0.8035]]])
tensor([[0.3813],
        [0.7860],
        [0.1115]])
tensor([[[0.3813, 0.3813],
         [0.7860, 0.7860],
         [0.1115, 0.1115]],

        [[0.3813, 0.3813],
         [0.7860, 0.7860],
         [0.1115, 0.1115]],

        [[0.3813, 0.3813],
         [0.7860, 0.7860],
         [0.1115, 0.1115]],

        [[0.3813, 0.3813],
         [0.7860, 0.7860],
         [0.1115, 0.1115]]])
tensor([[[0.2477, 0.6524],
     

In [26]:
#The following cell throws a run-time error. This is intentional.

a =     torch.ones(4, 3, 2)

#b = a * torch.rand(4, 3)    # dimensions must match last-to-first
#c = a * torch.rand(   2, 3) # both 3rd & 2nd dims different
#d = a * torch.rand((0, ))   # can't broadcast with an empty tensor

### Math operation

more operations from torch  

In [27]:
# common functions
a = torch.rand(2, 4) * 2 - 1
print('Common functions:')
print(torch.abs(a))
print(torch.ceil(a))
print(torch.floor(a))
print(torch.clamp(a, -0.5, 0.5))

# trigonometric functions and their inverses
angles = torch.tensor([0, math.pi / 4, math.pi / 2, 3 * math.pi / 4])
sines = torch.sin(angles)
inverses = torch.asin(sines)
print('\nSine and arcsine:')
print(angles)
print(sines)
print(inverses)

# bitwise operations
print('\nBitwise XOR:')
b = torch.tensor([1, 5, 11])
c = torch.tensor([2, 7, 10])
print(torch.bitwise_xor(b, c))

# comparisons:
print('\nBroadcasted, element-wise equality comparison:')
d = torch.tensor([[1., 2.], [3., 4.]])
e = torch.ones(1, 2)  # many comparison ops support broadcasting!
print(torch.eq(d, e)) # returns a tensor of type bool

# reductions:
print('\nReduction ops:')
print(torch.max(d))        # returns a single-element tensor
print(torch.max(d).item()) # extracts the value from the returned tensor
print(torch.mean(d))       # average
print(torch.std(d))        # standard deviation
print(torch.prod(d))       # product of all numbers
print(torch.unique(torch.tensor([1, 2, 1, 2, 1, 2]))) # filter unique elements

# vector and linear algebra operations
v1 = torch.tensor([1., 0., 0.])         # x unit vector
v2 = torch.tensor([0., 1., 0.])         # y unit vector
m1 = torch.rand(2, 2)                   # random matrix
m2 = torch.tensor([[3., 0.], [0., 3.]]) # three times identity matrix

print('\nVectors & Matrices:')
print(torch.cross(v2, v1)) # negative of z unit vector (v1 x v2 == -v2 x v1)
print(m1)
m3 = torch.matmul(m1, m2)
print(m3)                  # 3 times m1
print(torch.svd(m3))       # singular value decomposition

Common functions:
tensor([[0.1320, 0.7259, 0.0235, 0.6831],
        [0.8484, 0.5507, 0.8752, 0.6367]])
tensor([[-0., -0., 1., -0.],
        [-0., -0., -0., -0.]])
tensor([[-1., -1.,  0., -1.],
        [-1., -1., -1., -1.]])
tensor([[-0.1320, -0.5000,  0.0235, -0.5000],
        [-0.5000, -0.5000, -0.5000, -0.5000]])

Sine and arcsine:
tensor([0.0000, 0.7854, 1.5708, 2.3562])
tensor([0.0000, 0.7071, 1.0000, 0.7071])
tensor([0.0000, 0.7854, 1.5708, 0.7854])

Bitwise XOR:
tensor([3, 2, 1])

Broadcasted, element-wise equality comparison:
tensor([[ True, False],
        [False, False]])

Reduction ops:
tensor(4.)
4.0
tensor(2.5000)
tensor(1.2910)
tensor(24.)
tensor([1, 2])

Vectors & Matrices:
tensor([ 0.,  0., -1.])
tensor([[0.9998, 0.5944],
        [0.6541, 0.0337]])
tensor([[2.9994, 1.7833],
        [1.9622, 0.1010]])
torch.return_types.svd(
U=tensor([[-0.8847, -0.4661],
        [-0.4661,  0.8847]]),
S=tensor([3.9208, 0.8152]),
V=tensor([[-0.9101,  0.4144],
        [-0.4144, -0.9101]]))


### in-place math operations

In [31]:
a = torch.tensor([0, math.pi / 4, math.pi / 2, 3 * math.pi / 4])
print('a:')
print(a)
print(torch.sin(a))   # this operation creates a new tensor in memory
print(a)              # a has not changed

b = torch.tensor([0, math.pi / 4, math.pi / 2, 3 * math.pi / 4])
print('\nb:')
print(b)
print(torch.sin_(b))  # note the underscore
print(b)              # b has changed

a = torch.ones(2, 2)
b = torch.rand(2, 2)

print('\n\n ---Before:')
print(a)
print(b)
print('\nAfter adding:')
print(a.add_(b))
print(a)
print(b)
print('\nAfter multiplying')
print(b.mul_(b))
print(b)


a:
tensor([0.0000, 0.7854, 1.5708, 2.3562])
tensor([0.0000, 0.7071, 1.0000, 0.7071])
tensor([0.0000, 0.7854, 1.5708, 2.3562])

b:
tensor([0.0000, 0.7854, 1.5708, 2.3562])
tensor([0.0000, 0.7071, 1.0000, 0.7071])
tensor([0.0000, 0.7071, 1.0000, 0.7071])


 ---Before:
tensor([[1., 1.],
        [1., 1.]])
tensor([[0.4654, 0.1612],
        [0.1568, 0.2083]])

After adding:
tensor([[1.4654, 1.1612],
        [1.1568, 1.2083]])
tensor([[1.4654, 1.1612],
        [1.1568, 1.2083]])
tensor([[0.4654, 0.1612],
        [0.1568, 0.2083]])

After multiplying
tensor([[0.2166, 0.0260],
        [0.0246, 0.0434]])
tensor([[0.2166, 0.0260],
        [0.0246, 0.0434]])


There is another option for placing the result of a computation in an existing, allocated tensor. Many of the methods and functions we’ve seen so far - including creation methods! - have an out argument that lets you specify a tensor to receive the output. If the out tensor is the correct shape and dtype, this can happen without a new memory allocation:

In [34]:
a = torch.rand(2, 2)
b = torch.rand(2, 2)
c = torch.zeros(2, 2)
old_id = id(c) # object's memory address

print(old_id)

print(c)
d = torch.matmul(a, b, out=c)
print(c)                # contents of c have changed

assert c is d           # test c & d are same object, not just containing equal values
assert id(c) == old_id  # make sure that our new c is the same object as the old one

torch.rand(2, 2, out=c) # works for creation too!
print(c)                # c has changed again
assert id(c) == old_id  # still the same object!

139849393002112
tensor([[0., 0.],
        [0., 0.]])
tensor([[0.8166, 0.2758],
        [0.5218, 0.1442]])
tensor([[0.6625, 0.2297],
        [0.9545, 0.6099]])


### copying tensors
Assigning a tensor to a variable makes the variable a label of the tensor, and does not copy it.

In [36]:
a = torch.ones(2, 2)
b = a # just a reference to a, not a copy

a[0][1] = 561  # we change a...
print(b)       # ...and b is also altered


a = torch.ones(2, 2)
b = a.clone() # make a copy

assert b is not a      # different objects in memory...
print(torch.eq(a, b))  # ...but still with the same contents!

a[0][1] = 561          # a changes...
print(b)               # ...but b is still all ones



tensor([[  1., 561.],
        [  1.,   1.]])
tensor([[True, True],
        [True, True]])
tensor([[1., 1.],
        [1., 1.]])


to detach autograd from the tensor, use the detach to just copy the data for more efficient data:

In [37]:
a = torch.rand(2, 2, requires_grad=True) # turn on autograd
print(a)

b = a.clone()
print(b)

c = a.detach().clone()
print(c) # no gradient tracking

print(a)

tensor([[0.5643, 0.0594],
        [0.7099, 0.4250]], requires_grad=True)
tensor([[0.5643, 0.0594],
        [0.7099, 0.4250]], grad_fn=<CloneBackward0>)
tensor([[0.5643, 0.0594],
        [0.7099, 0.4250]])
tensor([[0.5643, 0.0594],
        [0.7099, 0.4250]], requires_grad=True)


### GPU

In [38]:
if torch.cuda.is_available():
    print('We have a GPU!')
else:
    print('Sorry, CPU only.')

We have a GPU!


In [39]:
if torch.cuda.is_available():
    gpu_rand = torch.rand(2, 2, device='cuda')
    print(gpu_rand)
else:
    print('Sorry, CPU only.')

tensor([[0.6130, 0.0101],
        [0.3984, 0.0403]], device='cuda:0')


If we want to develop a code that is robust (regardless of the CPU/GPU), we can create a handle, and define tesnsors based on the GPU. 

If you have an existing tensor living on one device, you can move it to another with the to() method. The following line of code creates a tensor on CPU, and moves it to whichever device handle you acquired in the previous cell.



In [40]:
if torch.cuda.is_available():
    my_device = torch.device('cuda')
else:
    my_device = torch.device('cpu')
print('Device: {}'.format(my_device))

x = torch.rand(2, 2, device=my_device)
print(x)


y = torch.rand(2, 2)
y = y.to(my_device) # transfer to GPU

Device: cuda
tensor([[0.9877, 0.1289],
        [0.5621, 0.5221]], device='cuda:0')


in order to do computation involving two or more tensors, all of the tensors must be on the same device. The following code will throw a runtime error, regardless of whether you have a GPU device available:

In [45]:
x = torch.rand(2, 2)
y = torch.rand(2, 2, device='cuda')
#z = x + y  # exception will be thrown

### Manipulating Tensor Shapes

 imagine having a model that works on 3 x 226 x 226 images - a 226-pixel square with 3 color channels. When you load and transform it, you’ll get a tensor of shape (3, 226, 226). Your model, though, is expecting input of shape (N, 3, 226, 226), where N is the number of images in the batch. So how do you make a batch of one?

In [4]:
a = torch.rand(3, 226, 226)
b = a.unsqueeze(0) # The unsqueeze() method adds a dimension of extent 1. unsqueeze(0) adds it as a new zeroth dimension - now you have a batch of one!

print(a.shape)
print(b.shape)

c = a.unsqueeze(1)
print(c.shape) 

torch.Size([3, 226, 226])
torch.Size([1, 3, 226, 226])
torch.Size([3, 1, 226, 226])


you’ll see that printing a shows an “extra” set of square brackets [] due to having an extra dimension.

You may only squeeze() dimensions of extent 1. See above where we try to squeeze a dimension of size 2 in c, and get back the same shape we started with. Calls to squeeze() and unsqueeze() can only act on dimensions of extent 1 because to do otherwise would change the number of elements in the tensor.

In [5]:
c = torch.rand(1, 1, 1, 1, 1)
print(c)

a = torch.rand(1, 20)
print(a.shape)
print(a)

b = a.squeeze(0) # removes the first dimension, 
print(b.shape)
print(b) # see the difference in the shape and contents of a and b

c = torch.rand(2, 2)
print(c.shape)

d = c.squeeze(0)
print(d.shape)

tensor([[[[[0.7197]]]]])
torch.Size([1, 20])
tensor([[0.8354, 0.4693, 0.8818, 0.8335, 0.2370, 0.9629, 0.3358, 0.8241, 0.1101,
         0.8760, 0.7264, 0.4484, 0.7953, 0.4162, 0.7104, 0.5623, 0.3949, 0.9325,
         0.9623, 0.3244]])
torch.Size([20])
tensor([0.8354, 0.4693, 0.8818, 0.8335, 0.2370, 0.9629, 0.3358, 0.8241, 0.1101,
        0.8760, 0.7264, 0.4484, 0.7953, 0.4162, 0.7104, 0.5623, 0.3949, 0.9325,
        0.9623, 0.3244])
torch.Size([2, 2])
torch.Size([2, 2])


In [7]:
a = torch.ones(4, 3, 2)

c = a * torch.rand(   3, 1) # 3rd dim = 1, 2nd dim identical to a
print(c)

a = torch.ones(4, 3, 2)
b = torch.rand(   3)     # trying to multiply a * b will give a runtime error
c = b.unsqueeze(1)       # change to a 2-dimensional tensor, adding new dim at the end
print(c.shape)
print(a * c)             # broadcasting works again!

batch_me = torch.rand(3, 226, 226)
print(batch_me.shape)
batch_me.unsqueeze_(0)
print(batch_me.shape)

output3d = torch.rand(6, 20, 20)
print(output3d.shape)

input1d = output3d.reshape(6 * 20 * 20)
print(input1d.shape)

# can also call it as a method on the torch module:
print(torch.reshape(output3d, (6 * 20 * 20,)).shape)

tensor([[[0.3232, 0.3232],
         [0.5999, 0.5999],
         [0.0262, 0.0262]],

        [[0.3232, 0.3232],
         [0.5999, 0.5999],
         [0.0262, 0.0262]],

        [[0.3232, 0.3232],
         [0.5999, 0.5999],
         [0.0262, 0.0262]],

        [[0.3232, 0.3232],
         [0.5999, 0.5999],
         [0.0262, 0.0262]]])
torch.Size([3, 1])
tensor([[[0.9205, 0.9205],
         [0.9551, 0.9551],
         [0.4157, 0.4157]],

        [[0.9205, 0.9205],
         [0.9551, 0.9551],
         [0.4157, 0.4157]],

        [[0.9205, 0.9205],
         [0.9551, 0.9551],
         [0.4157, 0.4157]],

        [[0.9205, 0.9205],
         [0.9551, 0.9551],
         [0.4157, 0.4157]]])
torch.Size([3, 226, 226])
torch.Size([1, 3, 226, 226])
torch.Size([6, 20, 20])
torch.Size([2400])
torch.Size([2400])


### Numpy and pytorch

In [9]:
import numpy as np

numpy_array = np.ones((2, 3))
print(numpy_array)

pytorch_tensor = torch.from_numpy(numpy_array)
print(pytorch_tensor)

pytorch_rand = torch.rand(2, 3)
print(pytorch_rand)

numpy_rand = pytorch_rand.numpy()
print(numpy_rand)

numpy_array[1, 1] = 23
print(pytorch_tensor)

pytorch_rand[1, 1] = 17
print(numpy_rand)


[[1. 1. 1.]
 [1. 1. 1.]]
tensor([[1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
tensor([[0.6468, 0.4002, 0.9548],
        [0.9986, 0.0734, 0.9361]])
[[0.6468483  0.40015334 0.9547675 ]
 [0.9986462  0.07335663 0.93613154]]
tensor([[ 1.,  1.,  1.],
        [ 1., 23.,  1.]], dtype=torch.float64)
[[ 0.6468483   0.40015334  0.9547675 ]
 [ 0.9986462  17.          0.93613154]]
