Pytorch

Dynamic computation graph - key advantage / difference overtensorflow. Very useful for RNNs. Also, enables changing the training batch size and etc is possible during training. 
    Each line of code dynamically adds to a graph.
Implemented in python - so debugging is better



In [5]:
import torch

# requires_grad -> Tell pytorch that this nodes should be updated w/ gradients
x = torch.ones((2,2), requires_grad=True)
y = x + 2
z = 2*y*y
out = z.mean()

#print(out)

#generates gradients d(out)/dx and propogates back to x for update
out.backward()
#gradient of X w/ which the variable would be updated
print(x.grad)


tensor([[3., 3.],
        [3., 3.]])


In [6]:
#1-D tensor. 
a = torch.tensor([2,2,1])
print(a)

tensor([2, 2, 1])


In [7]:
#2-D tensor
b = torch.tensor([[2,1,4],[3,5,6]])
print(b)

tensor([[2, 1, 4],
        [3, 5, 6]])


In [8]:
#size of tensors
print(a.shape)
print(b.shape)
print(a.size())
print(b.size())

torch.Size([3])
torch.Size([2, 3])
torch.Size([3])
torch.Size([2, 3])


In [9]:
#Get height or number of rows of b
print(b.shape[0])

2


In [10]:
#Float tensor: values in the tensors are floats
c = torch.FloatTensor([[2,1,4],[2,5,6]])
#double tensor: values in the tensors are doubles
d = torch.DoubleTensor([2,2,1])
#alternative way: d=torch.tensor([2,2,1], dtype=torch.double). lly for float as well. 

In [11]:
print(c)
print(c.dtype)
print(c.mean())
print(c.std())

tensor([[2., 1., 4.],
        [2., 5., 6.]])
torch.float32
tensor(3.3333)
tensor(1.9664)


In [12]:
print(d)
print(d.dtype)
print(d.mean())
print(d.std())

tensor([2., 2., 1.], dtype=torch.float64)
torch.float64
tensor(1.6667, dtype=torch.float64)
tensor(0.5774, dtype=torch.float64)


Re-shaping tensors - Extremely important

The method to reshape a tensor is "view". Give dimensions to reshape to the view func. If -1 for a dimension, that would be inferred by python.


In [13]:
print(b.view(-1,1))
print(b.view(6))
print(b.reshape(-1,2))

b = b.view(1,-1)
print(b)
print(b.shape)

#Create 3D tensor with 2 channels, 3 rows, 4 columns (channels, rows, columns)
three_dim = torch.rand(2,3,4)
print("\n")
print(three_dim.shape)
print(three_dim)
print(three_dim.view(2,-1))

tensor([[2],
        [1],
        [4],
        [3],
        [5],
        [6]])
tensor([2, 1, 4, 3, 5, 6])
tensor([[2, 1],
        [4, 3],
        [5, 6]])
tensor([[2, 1, 4, 3, 5, 6]])
torch.Size([1, 6])


torch.Size([2, 3, 4])
tensor([[[0.8612, 0.5959, 0.6172, 0.2339],
         [0.9311, 0.5716, 0.8916, 0.3686],
         [0.1235, 0.0397, 0.4659, 0.9374]],

        [[0.5254, 0.7874, 0.9019, 0.1523],
         [0.7008, 0.6153, 0.4355, 0.2603],
         [0.9818, 0.9065, 0.3279, 0.8767]]])
tensor([[0.8612, 0.5959, 0.6172, 0.2339, 0.9311, 0.5716, 0.8916, 0.3686, 0.1235,
         0.0397, 0.4659, 0.9374],
        [0.5254, 0.7874, 0.9019, 0.1523, 0.7008, 0.6153, 0.4355, 0.2603, 0.9818,
         0.9065, 0.3279, 0.8767]])


In [14]:
#Matrix w/ random numbers in range [0,1] (math notation)
r2 =torch.rand(4,4)
#r3 = torch.rand([4,4])
print(r2)
#print(r3)

tensor([[0.4081, 0.1049, 0.7361, 0.2212],
        [0.8706, 0.3780, 0.1446, 0.2851],
        [0.8479, 0.1715, 0.1173, 0.6180],
        [0.5915, 0.8875, 0.5814, 0.6090]])


In [15]:
#Matrix w/ random numbers generated from a normal distribution with mean 0 and variance 1
r4 = torch.randn(4,4)
print(r4)

tensor([[ 0.9817, -0.0725,  0.5160, -0.0769],
        [ 2.7607, -1.5576,  0.9992,  0.3418],
        [ 0.8011,  0.1054,  0.2290, -0.1287],
        [-1.1065,  0.4416, -0.3332,  0.3106]])


In [16]:
#Create an array of 5 integers from values b/w 6 and 9 (below, 10 is exclusive)
#Note: The size must be given as a tuple
int_arr = torch.randint(6,10,(5,))
twod_ir = torch.randint(6,10,(3,4))
print(int_arr)
print(int_arr.dtype)
print(twod_ir)

#Number of elements: len gives out only the rows, while numel gives out all the elements in the array
print(len(twod_ir))
print(torch.numel(twod_ir))

tensor([9, 8, 8, 8, 7])
torch.int64
tensor([[9, 8, 8, 8],
        [6, 7, 6, 7],
        [9, 6, 6, 9]])
3
12


In [17]:
#Matrix of zeros and ones.  Default dtype is float 
z = torch.zeros(3,3,dtype=torch.long)
o = torch.ones(3,3,dtype=torch.float)

print(z, o)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]]) tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])


In [18]:
# Generate a random array whose size matches another array r2
r2_like = torch.randn_like(r2, dtype=torch.double)
print(r2_like)

tensor([[-0.8087,  0.5464, -0.3969, -1.2258],
        [ 0.8609,  1.6271, -0.7424, -0.0041],
        [-1.4082,  0.0473, -1.4184,  0.4589],
        [ 0.2762, -0.4963, -1.2571,  0.6095]], dtype=torch.float64)


In [19]:
#Adding two tensors
reg_add = r2 + r2_like
t_add = torch.add(r2,r2_like)

print(reg_add)
print(t_add)
#Element-wise equivalance check. Result will be a tesnor of bools
print(reg_add==t_add)
# Returns bool
print(torch.equal(reg_add, t_add))

tensor([[-0.4005,  0.6513,  0.3392, -1.0046],
        [ 1.7315,  2.0051, -0.5978,  0.2810],
        [-0.5603,  0.2188, -1.3011,  1.0769],
        [ 0.8678,  0.3912, -0.6757,  1.2185]], dtype=torch.float64)
tensor([[-0.4005,  0.6513,  0.3392, -1.0046],
        [ 1.7315,  2.0051, -0.5978,  0.2810],
        [-0.5603,  0.2188, -1.3011,  1.0769],
        [ 0.8678,  0.3912, -0.6757,  1.2185]], dtype=torch.float64)
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])
True


In [20]:

print(r2.add(r2_like))
print(r2)
#In-place addition. Addition performed and r2 is updated w/ result
print(r2.add_(r2_like))

tensor([[-0.4005,  0.6513,  0.3392, -1.0046],
        [ 1.7315,  2.0051, -0.5978,  0.2810],
        [-0.5603,  0.2188, -1.3011,  1.0769],
        [ 0.8678,  0.3912, -0.6757,  1.2185]], dtype=torch.float64)
tensor([[0.4081, 0.1049, 0.7361, 0.2212],
        [0.8706, 0.3780, 0.1446, 0.2851],
        [0.8479, 0.1715, 0.1173, 0.6180],
        [0.5915, 0.8875, 0.5814, 0.6090]])
tensor([[-0.4005,  0.6513,  0.3392, -1.0046],
        [ 1.7315,  2.0051, -0.5978,  0.2810],
        [-0.5603,  0.2188, -1.3011,  1.0769],
        [ 0.8678,  0.3912, -0.6757,  1.2185]])


In [21]:
#Slicing
print(r2[:,1]) # all rows of column 1
print(r2[:,:2]) # all rows and columns until 2 ( upper bound not inclusive in python)
num_ten = r2[2,3] # Element at 2nd row and 3rd column
print(num_ten)
#numten is still a tensor which means it is a class object. You can get the data out of it as follows. 
print(type(num_ten))
print(num_ten.item(), type(num_ten.item()))


tensor([0.6513, 2.0051, 0.2188, 0.3912])
tensor([[-0.4005,  0.6513],
        [ 1.7315,  2.0051],
        [-0.5603,  0.2188],
        [ 0.8678,  0.3912]])
tensor(1.0769)
<class 'torch.Tensor'>
1.0769110918045044 <class 'float'>


In [22]:
import numpy as np

In [24]:
#Numpy Bridge
a = torch.ones(5)
print(a)
b= a.numpy()
print(b)
#Numpy arrays are AFFECTED by the operations on the torch tensor. In the example below b changes as well due to addition perfromed on a. 
a.add_(1)
print(a)
print(b)


tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]
tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [26]:
#Similar behavior other way around
a = np.ones(5)
b = torch.from_numpy(a)
# add 1 to a and update a w/ the result
np.add(a,1,out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


In [29]:
#Move tensor to GPU to speed up calculations. WE DONT HAVE CUDA
#r2 = r2.cuda()
#print(r2)
#If cuda was enables,  the tensor is returned with "device='cuda:0'"
# tesnor([1,2,3], device='cuda:0')

In [31]:
#Provides easy switching to CPU and GPU. The command below returns a boolean which we can use. So need to write separate codes for CPU and GPU
#CUDA = torch.cuda.is_available() 
#if CUDA: 
#   add_result = add_result.cuda()
#   print(add_result)

In [32]:
# Convert list to a tensor. List can be multidimentsional
a= [2,3,4,1]
print(a)
to_list = torch.tensor(a)
print(to_list, to_list.dtype)

[2, 3, 4, 1]
tensor([2, 3, 4, 1]) torch.int64


In [38]:
#Tensor concatenation
f1 = torch.randn(3,5)
print(f1)
s1 =  torch.randn(3,5)
print(s1)

#Concat along rows
c1 = torch.cat([f1,s1])
print(c1)

#Concat along columns
c2 = torch.cat([f1,s1],1)
print(c2)

tensor([[ 1.7970,  0.2046, -0.6591, -0.6871,  0.3704],
        [ 0.5875,  0.1893, -0.9347, -0.6139,  1.0901],
        [ 0.3091,  1.1595, -1.7353,  0.3249,  0.9774]])
tensor([[ 1.0854, -0.7396, -0.7283, -0.3023,  0.3883],
        [-1.6359,  0.6533, -1.1465, -0.9978, -1.2423],
        [-0.0935,  0.4088, -2.6049,  0.2012, -0.1268]])
tensor([[ 1.7970,  0.2046, -0.6591, -0.6871,  0.3704],
        [ 0.5875,  0.1893, -0.9347, -0.6139,  1.0901],
        [ 0.3091,  1.1595, -1.7353,  0.3249,  0.9774],
        [ 1.0854, -0.7396, -0.7283, -0.3023,  0.3883],
        [-1.6359,  0.6533, -1.1465, -0.9978, -1.2423],
        [-0.0935,  0.4088, -2.6049,  0.2012, -0.1268]])
tensor([[ 1.7970,  0.2046, -0.6591, -0.6871,  0.3704,  1.0854, -0.7396, -0.7283,
         -0.3023,  0.3883],
        [ 0.5875,  0.1893, -0.9347, -0.6139,  1.0901, -1.6359,  0.6533, -1.1465,
         -0.9978, -1.2423],
        [ 0.3091,  1.1595, -1.7353,  0.3249,  0.9774, -0.0935,  0.4088, -2.6049,
          0.2012, -0.1268]])


In [50]:
#Adding dimensions to a tensor
tensor_1d = torch.tensor([1,2,3,4])
tensor_2d = torch.unsqueeze(tensor_1d,0) #add another dimension along 0 axis
print(tensor_1d)
print(tensor_2d)
print(tensor_1d.shape)
print(tensor_2d.shape)

tensor_2dc = torch.unsqueeze(tensor_1d,1) #add another dimension along 1 axis
print(tensor_2dc)
print(tensor_2dc.shape)

tensor_3d = torch.unsqueeze(tensor_2dc, 1) 
print(tensor_3d)
print(tensor_3d.shape)

tensor([1, 2, 3, 4])
tensor([[1, 2, 3, 4]])
torch.Size([4])
torch.Size([1, 4])
tensor([[1],
        [2],
        [3],
        [4]])
torch.Size([4, 1])
tensor([[[1]],

        [[2]],

        [[3]],

        [[4]]])
torch.Size([4, 1, 1])


AUTO GRAD in pytorch

In [53]:
#If require_grad = True, the tensor object keeps track of how it was created
x = torch.tensor([1.,2.,3.], requires_grad=True)
y = torch.tensor([4.,5.,6.], requires_grad=True)
z = x + y
print(z)
#z keeps track of how it was created which is as a result of addition og x & y. (it know that it wasn't read-in from a file)
print(z.grad_fn)
#We can go further and the objects still keep track
s = z.sum()
print(s)
print(s.grad_fn)

tensor([5., 7., 9.], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x7fff6c2b88e0>
tensor(21., grad_fn=<SumBackward0>)
<SumBackward0 object at 0x7fff6c2bb6d0>


In [55]:
#Now we can backprop on s, this calculates all the gradients in the dynamic computation graph
s.backward()
#Now if we want ds/dx, we do it as follows
print(x.grad)

tensor([2., 2., 2.])


In [59]:
#By default, requires_grad is False
x = torch.randn(2,2)
y = torch.randn(2,2)
print(x.requires_grad, y.requires_grad)
#z doesn't remember/track how it was created since the underlying tesnors don't have requires_grad=True and we cannot backprop
z = x + y
print(z.grad_fn)

#Now we can set requires_grad on the existing tensors as follows
x.requires_grad_()
y.requires_grad_()
#Now z tracks how it was created as the requires grad is set on i/p
z = x + y
print(z.grad_fn)

#In conclusion, if i/p's have required grad set to True on them, all the variables created using these i/ps will have require grad set to True

# We are detaching z and storing in new tensor. Detach returns a tensor whose data will be same as z but all the computation history and how it was created is forgot
new_z = z.detach()
print(new_z.grad_fn)

#Stop autograd from tracking history in tensors. This is useful in transfer learning. 
# We are just printing the value of requires_grad
print(x.requires_grad)
print((x+10).requires_grad)

# Here we are stopping the tracking with this setting
with torch.no_grad():
    print((x+10).requires_grad)


False False
None
<AddBackward0 object at 0x7fff6c187250>
None
True
True
False


In [62]:
#Causual example of building a dynamic computation graph and backprop-ing the gradients 
x = torch.ones(2,2,requires_grad=True)
print(x)
y = x + 2
print(y)
print(y.grad_fn)
z = y*y*3
out = z.mean()
print(z,out)
out.backward()
print(x.grad)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x7fff6c219090>
tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)
tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])
