### Kaggle PyTorch Tutorial

In [1]:
import torch
import numpy as np

### Tensor

Tensors are matrix like data structures which are essential components in deep learning libraries and efficient computation. GPUs are especially effective at calculating operations between tensors and this has spurred the surge in deep learning capability in recent times.

In [2]:
#Constructing a 5x3 Matrix Uninitialized
x = torch.empty(5,3)
print(x)

tensor([[0.0000e+00, 2.0000e+00, 0.0000e+00],
        [2.0000e+00, 8.4037e-10, 7.5553e+28],
        [8.8649e-04, 7.7764e+31, 2.7947e+20],
        [5.1190e+13, 2.6915e+20, 5.0942e-14],
        [1.6109e-19, 1.1578e+27, 7.2902e+22]])


In [3]:
#Convert to numpy
x.numpy()

array([[0.00000000e+00, 2.00000000e+00, 0.00000000e+00],
       [2.00000000e+00, 8.40373326e-10, 7.55533019e+28],
       [8.86491616e-04, 7.77639599e+31, 2.79472828e+20],
       [5.11899871e+13, 2.69145827e+20, 5.09422216e-14],
       [1.61085566e-19, 1.15783705e+27, 7.29015776e+22]], dtype=float32)

In [4]:
#Size of tensor
x.size()

torch.Size([5, 3])

In [5]:
#From Numpy to tensor
a = np.array([[3,4],[4,3]])
b = torch.from_numpy(a)
print(b)

tensor([[3, 4],
        [4, 3]])


#### Tensor Operations

In [6]:
#Random Similar to Numpy
x = torch.rand(5,3)
print(x)

tensor([[0.7751, 0.0554, 0.1828],
        [0.2882, 0.3740, 0.2789],
        [0.5662, 0.9860, 0.1723],
        [0.8222, 0.2833, 0.4095],
        [0.5183, 0.0963, 0.8674]])


In [7]:
#Construct a matrix filled zeros and of dtype long
x = torch.zeros(5,3, dtype=torch.long)
print(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


In [8]:
x = torch.ones(3,3,dtype=torch.long)
print(x)

tensor([[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]])


In [9]:
#Construct a Tensor Directly from data
x = torch.tensor([2.5, 7])
print(x)

tensor([2.5000, 7.0000])


In [10]:
#Create Tensor based on existing tensor
x = x.new_ones(5,3, dtype = torch.double)
print(x)
print(x.size())

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
torch.Size([5, 3])


In [11]:
x = torch.randn_like(x, dtype=torch.float)
print(x)
print(x.size())

tensor([[ 0.1543,  1.8748, -0.5941],
        [-0.6886,  0.3577, -0.9396],
        [-0.3782, -1.5011, -0.5467],
        [ 1.6370, -0.9726,  0.7931],
        [-1.2449,  0.5047, -2.5078]])
torch.Size([5, 3])


### Basic Tensor Operations

In [12]:
x = torch.rand(5,3)
y = torch.rand(5,3)

print(x+y) # old method
print(torch.add(x, y)) # new method

tensor([[0.5850, 1.0052, 1.6812],
        [1.6261, 0.4926, 0.5655],
        [1.6967, 0.2539, 1.0863],
        [1.0678, 1.0594, 1.1202],
        [0.2898, 1.1865, 1.1090]])
tensor([[0.5850, 1.0052, 1.6812],
        [1.6261, 0.4926, 0.5655],
        [1.6967, 0.2539, 1.0863],
        [1.0678, 1.0594, 1.1202],
        [0.2898, 1.1865, 1.1090]])


In [13]:
x = torch.rand(5,3)
y = torch.rand(5,3)
print(x-y) #old method
print(torch.sub(x,y)) #new method

tensor([[-0.2704, -0.3204,  0.5116],
        [-0.3066, -0.0073, -0.0426],
        [ 0.1317,  0.2188, -0.2124],
        [-0.1301,  0.2635,  0.7938],
        [-0.0628,  0.3015,  0.2644]])
tensor([[-0.2704, -0.3204,  0.5116],
        [-0.3066, -0.0073, -0.0426],
        [ 0.1317,  0.2188, -0.2124],
        [-0.1301,  0.2635,  0.7938],
        [-0.0628,  0.3015,  0.2644]])


In [14]:
x = torch.rand(5,3)
y = torch.rand(5,3)
print(x/y) #old method
print(torch.div(x,y)) #new method

tensor([[0.0350, 4.5726, 5.1997],
        [0.7121, 0.2048, 0.0336],
        [0.7217, 1.1211, 0.4167],
        [3.3045, 0.9530, 3.4150],
        [0.0151, 0.6094, 0.5289]])
tensor([[0.0350, 4.5726, 5.1997],
        [0.7121, 0.2048, 0.0336],
        [0.7217, 1.1211, 0.4167],
        [3.3045, 0.9530, 3.4150],
        [0.0151, 0.6094, 0.5289]])


In [15]:
x = torch.rand(5,3)
y = torch.rand(5,3)
print(x*y) #old method
print(torch.mul(x, y)) #new method

tensor([[0.2006, 0.2701, 0.4111],
        [0.1883, 0.1811, 0.7472],
        [0.3148, 0.2101, 0.1206],
        [0.0137, 0.0074, 0.0167],
        [0.1956, 0.0914, 0.0073]])
tensor([[0.2006, 0.2701, 0.4111],
        [0.1883, 0.1811, 0.7472],
        [0.3148, 0.2101, 0.1206],
        [0.0137, 0.0074, 0.0167],
        [0.1956, 0.0914, 0.0073]])


In [16]:
#Adds x to y
#New Value is now inside y
y.add_(x)
print(y)

tensor([[1.1778, 1.0689, 1.2837],
        [0.8929, 0.9068, 1.7451],
        [1.1385, 1.1047, 0.9282],
        [0.5596, 0.5791, 0.9224],
        [1.0352, 0.8908, 0.1892]])


In [17]:
#Standard Numpy like indexing
print(x[:, 1])

tensor([0.4097, 0.6097, 0.8605, 0.5661, 0.7724])


In [18]:
#Resizing
x = torch.randn(4,4)
y = x.view(16)
z = x.view(-1, 8) # The Size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


### Variable

The difference between pytorch and numpy is that is provides automatic derivation, which can automatically give you the gradient of the parameters you want. This operation is provided by another basic element, Variable

A variable wraps a Tensor. It supports nearly all the APIs defined by a Tensor. Variable also provides a backward method to perform backpropagation. For example, to backpropagate a loss function to train model parameter x, we use a variable loss to store the value computed by a loss function. Then, we call loss.backward which computes the gradient ∂loss∂x for all the trainable parameters. Pytorch will store the gradient results back in the corresponding variable x

Variable in torch is to build a computational graph, but this graph is dynamic compared with a static graph in tensorflow or theano. So torch does not have placeholder, torch can just pass variable to the computational graph

In [19]:
import torch
from torch.autograd import Variable

In [20]:
#Build a tensor
#Build a variable, usually for compute gradients

tensor = torch.FloatTensor([[1,2], [3,4]])
variable = Variable(tensor, requires_grad=True)

print(tensor)
print(variable)

tensor([[1., 2.],
        [3., 4.]])
tensor([[1., 2.],
        [3., 4.]], requires_grad=True)


Till now the tensor and variable seem the same. However, the variable is a part of the graph, its a part of the auto-gradient

In [21]:
t_out = torch.mean(tensor*tensor)
v_out = torch.mean(variable*variable)

print(t_out)
print(v_out)

tensor(7.5000)
tensor(7.5000, grad_fn=<MeanBackward1>)


* Backpropagation from v_out
* v_out = 1/4 * sum(variable * variable)
* the gradients with respect to the variable d(v_out)/d(variable) = 1/4 * 2 * variable = variable/2

In [22]:
v_out.backward()
print(variable.grad)

tensor([[0.5000, 1.0000],
        [1.5000, 2.0000]])


This is data in variable format

In [23]:
#Tensor in Variable Format
print(variable)

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)


In [24]:
#Tensor in Data Format
print(variable.data)

tensor([[1., 2.],
        [3., 4.]])


In [25]:
#Tensor in Numpy Format
print(variable.data.numpy())

[[1. 2.]
 [3. 4.]]
