# Torch basics

In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import torch
import numpy as np

In [2]:
torch.__version__

'1.8.1+cu101'

Largely inspired from the tutorial [What is PyTorch?](https://pytorch.org/tutorials/beginner/former_torchies/tensor_tutorial.html)

Tensors are used to encode the signal to process, but also the internal states and parameters of models.

Manipulating data through this constrained structure allows to use CPUs and GPUs at peak performance.



## Tensors

Construct a 3x5 matrix, uninitialized:

In [3]:
# Sets the default floating point dtype. 
# This type will be used as default floating point type for type inference in torch.tensor().
torch.set_default_tensor_type('torch.FloatTensor') 

In [4]:
x = torch.empty(3,5)
print(x.type())
print(x)

torch.FloatTensor
tensor([[-2.6384e+00,  3.0787e-41,  3.3631e-44,  0.0000e+00,         nan],
        [ 3.0787e-41,  1.1578e+27,  1.1362e+30,  7.1547e+22,  4.5828e+30],
        [ 1.2121e+04,  7.1846e+22,  9.2198e-39,  7.0374e+22, -8.1007e-01]])


In [5]:
x = torch.randn(3,5)
print(x)

tensor([[-0.3620, -0.3776, -1.1011, -0.3137,  0.0259],
        [ 0.0785, -0.8459, -0.5939,  2.6388,  1.5043],
        [-0.3496, -0.8013, -0.3702, -0.5995, -1.4226]])


In [6]:
print(x.size())

torch.Size([3, 5])


torch.Size is in fact a [tuple](https://docs.python.org/3/tutorial/datastructures.html#tuples-and-sequences), so it supports the same operations.

In [7]:
x.size()[1]

5

In [8]:
x.size() == (3,5)

True

Importance of the brackets when defining a tensor

In [9]:
a = torch.tensor([2.0])
print(type(a))
print(a.dtype)
print(a.size())

b = torch.tensor(2.0)
print(type(b))
print(b.dtype)
print(b.size())

<class 'torch.Tensor'>
torch.float32
torch.Size([1])
<class 'torch.Tensor'>
torch.float32
torch.Size([])


Select some columns

In [10]:
cols = torch.zeros(5, dtype=torch.bool)
print(cols)
cols[1] = True
cols[4] = True
print(cols)
c = x[:, cols]  # selects all rows, 4th column and  5th column from a
print(c)

tensor([False, False, False, False, False])
tensor([False,  True, False, False,  True])
tensor([[-0.3776,  0.0259],
        [-0.8459,  1.5043],
        [-0.8013, -1.4226]])


All operations on the tensor that operate in-place on it will have an _ postfix. 

In [11]:
# x will be filled with the value 3.5
x.fill_(3.5)
print(x)

tensor([[3.5000, 3.5000, 3.5000, 3.5000, 3.5000],
        [3.5000, 3.5000, 3.5000, 3.5000, 3.5000],
        [3.5000, 3.5000, 3.5000, 3.5000, 3.5000]])


## Bridge to numpy

In [12]:
y = x.numpy()
print(y)

[[3.5 3.5 3.5 3.5 3.5]
 [3.5 3.5 3.5 3.5 3.5]
 [3.5 3.5 3.5 3.5 3.5]]


In [13]:
a = np.ones(5)
b = torch.from_numpy(a)
print(b)
 

tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


In [14]:
xr = torch.randn(3, 5)
a = np.ones(5).astype(int)
b = torch.from_numpy(a)
print(xr)
print(b)

tensor([[-2.1421,  0.1627,  1.5620,  0.0801, -0.6368],
        [ 0.3609,  0.6428, -0.4246, -1.6004,  0.5782],
        [ 1.8494,  2.0860, -0.0298, -0.0685,  1.3256]])
tensor([1, 1, 1, 1, 1])


### Question: print the type of the content (data) of variables a, b and xr

In [15]:
# COMPLETE THE CELL
# ...

## Operations

There are multiple syntaxes for operations. In the following
example, we will take a look at the addition operation.

Addition: syntax 1

In [16]:
x = torch.rand(5, 3) 
y = torch.rand(5, 3)
print(x + y)

tensor([[1.4688, 0.8230, 1.2771],
        [1.4428, 1.0737, 0.8907],
        [1.3165, 0.4991, 1.1901],
        [0.9536, 0.4484, 1.3300],
        [1.4318, 0.5955, 1.7426]])


Addition: syntax 2

In [17]:
print(torch.add(x, y))

tensor([[1.4688, 0.8230, 1.2771],
        [1.4428, 1.0737, 0.8907],
        [1.3165, 0.4991, 1.1901],
        [0.9536, 0.4484, 1.3300],
        [1.4318, 0.5955, 1.7426]])


Addition: providing an output tensor as argument

In [18]:
result = torch.empty(5, 3)
torch.add(x, y, out=result)
print(result)

tensor([[1.4688, 0.8230, 1.2771],
        [1.4428, 1.0737, 0.8907],
        [1.3165, 0.4991, 1.1901],
        [0.9536, 0.4484, 1.3300],
        [1.4318, 0.5955, 1.7426]])


Addition: in-place

In [19]:
# adds x to y
y.add_(x)
print(y)

tensor([[1.4688, 0.8230, 1.2771],
        [1.4428, 1.0737, 0.8907],
        [1.3165, 0.4991, 1.1901],
        [0.9536, 0.4484, 1.3300],
        [1.4318, 0.5955, 1.7426]])


**Note:** Any operation that mutates a tensor in-place is post-fixed with an ``_``.
    For example: ``x.copy_(y)``, ``x.t_()``, will change ``x``.



Any operation that mutates a tensor in-place is post-fixed with an ```_```

For example: ```x.copy_(y)```, ```x.t_()```, will change ```x```.

In [20]:
print(x.t())

tensor([[0.9679, 0.6875, 0.4824, 0.1079, 0.5122],
        [0.3806, 0.9131, 0.0937, 0.0280, 0.1351],
        [0.7914, 0.3779, 0.4297, 0.9885, 0.8692]])


In [21]:
print(x)

tensor([[0.9679, 0.3806, 0.7914],
        [0.6875, 0.9131, 0.3779],
        [0.4824, 0.0937, 0.4297],
        [0.1079, 0.0280, 0.9885],
        [0.5122, 0.1351, 0.8692]])


In [22]:
x.t_()
print(x)

tensor([[0.9679, 0.6875, 0.4824, 0.1079, 0.5122],
        [0.3806, 0.9131, 0.0937, 0.0280, 0.1351],
        [0.7914, 0.3779, 0.4297, 0.9885, 0.8692]])


You can use standard NumPy-like indexing with all bells and whistles!

In [23]:
print(x[:, 1])

tensor([0.6875, 0.9131, 0.3779])


Resizing (very useful): If you want to resize/reshape tensor, you can use ``torch.view``:

In [24]:
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


If you have a one element tensor, use ``.item()`` to get the value as a
Python number

In [25]:
x = torch.randn(1)
print(x)
print(x.item())

tensor([0.4263])
0.42633482813835144


**Read later:**


  100+ Tensor operations, including transposing, indexing, slicing,
  mathematical operations, linear algebra, random numbers, etc.,
  are described
  [here](https://pytorch.org/docs/torch).

## 3D Tensors

### Question: What is the size of the following tensor?

In [26]:
y = torch.tensor([
     [
       [1, 2, 3],
       [4, 5, 6]
     ],
     [
       [1, 2, 3],
       [4, 5, 6]
     ],
     [
       [1, 2, 3],
       [4, 5, 6]
     ]
   ])
print(y)

tensor([[[1, 2, 3],
         [4, 5, 6]],

        [[1, 2, 3],
         [4, 5, 6]],

        [[1, 2, 3],
         [4, 5, 6]]])


In [27]:
# COMPLETE THE CELL
# ...

### Question: Explain the result of the next cell

In [28]:
torch.sum(y, dim=0)

tensor([[ 3,  6,  9],
        [12, 15, 18]])

## Broadcasting semantics

In short, if a PyTorch operation supports broadcast, then its Tensor arguments can be automatically expanded to be of equal sizes (without making copies of the data).

Two tensors are “broadcastable” if the following rules hold:

*   Each tensor has at least one dimension.
*   When iterating over the dimension sizes, **starting at the trailing dimension**, the dimension sizes must either be equal, one of them is 1, or one of them does not exist.

If two tensors x, y are “broadcastable”, the resulting tensor size is calculated as follows:
* If the number of dimensions of x and y are not equal, prepend 1 to the dimensions of the tensor with fewer dimensions to make them equal length.
* Then, for each dimension size, the resulting dimension size is the max of the sizes of x and y along that dimension.

More details [here](https://pytorch.org/docs/stable/notes/broadcasting.html)



In [29]:
# can line up trailing dimensions to make reading easier
x=torch.empty(5,1,4,1)
y=torch.empty(  3,1,1)
print((x+y).size())



torch.Size([5, 3, 4, 1])


In [30]:
# but not necessary:
x=torch.empty(1)
y=torch.empty(3,1,7)
print((x+y).size())



torch.Size([3, 1, 7])


### Question: The following command does not work. Why?



In [31]:
x=torch.empty(5,2,4,1)
y=torch.empty(  3,1,1)
print((x+y).size())


RuntimeError: ignored

In [None]:
x=2*torch.ones(  2,4)
y=torch.ones(3,2,4)
print(x+y)

### Question: What is the diffence between "x = xr" and "x = xr.clone()"? 

In [None]:
x = xr.clone()
x.add_(-xr)
print(x)
print(xr)

Also be careful, changing the torch tensor modify the numpy array and vice-versa...

In [None]:
y=torch.ones(2,4)
print(y)
z = y.numpy()
print(z)

In [None]:
np.add(z, 1, out=z)
print("z=", z)
print("y=", y,"\n")
torch.add(y, -4, out=y)
print("z=",z)
print("y=",y)

## Computational graphs

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import torch

In [None]:
!pip install torchviz

In [None]:
import torchviz

In [None]:
x = torch.ones(2, 2, requires_grad=True)
w = torch.rand(1, 1, requires_grad=True)
print(x)
print(w)
y = w * x + 2
print(y)
torchviz.make_dot(y)


## Playing with pytorch: linear regression

Code for plotting the surface

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

def plot_figs(fig_num, elev, azim, x, y, weights, bias):
    fig = plt.figure(fig_num, figsize=(4, 3))
    plt.clf()
    ax = Axes3D(fig, elev=elev, azim=azim)
    ax.scatter(x[:, 0], x[:, 1], y)
    ax.plot_surface(np.array([[0, 0], [1, 1]]),
                    np.array([[0, 1], [0, 1]]),
                    (np.dot(np.array([[0, 0, 1, 1],
                                          [0, 1, 0, 1]]).T, weights) + bias).reshape((2, 2)),
                    alpha=.5)
    ax.set_xlabel('x_1')
    ax.set_ylabel('x_2')
    ax.set_zlabel('y')
    
def plot_views(x, y, w, b):
    # Generate the different figures from different views
    elev = 43.5
    azim = -110
    plot_figs(1, elev, azim, x, y, w, b[0])

    plt.show()

Code for generating the 2D points

In [None]:
#Data generation (2D points)
w_source = torch.tensor([2., -3.], dtype=torch.float).view(-1,1)
b_source  = torch.tensor([1.], dtype=torch.float)

x = torch.empty(30, 2).uniform_(0, 1) # input of the regression model

print(x.shape)
print(w_source.shape)
print(b_source.shape)

y = torch.matmul(x,w_source)+b_source # output of the regression model
print(y.shape)

Plot the dataset

In [None]:
plot_views(x.numpy(), y.numpy(), w_source.numpy(), b_source.numpy())

In [None]:
# randomly initialize learnable weights and bias
w_t_init = torch.empty(2, 1).uniform_(-1, 1) 
b_t_init = torch.empty(1, 1).uniform_(-1, 1)

print("Initial values of the parameters:")
print(w_t_init) 
print(b_t_init)



### Question: calculate the gradient of the loss and code it.

In [None]:
# our model forward pass
def forward_t(x):
    return x.mm(w_t)+b_t

# Loss function
def loss_t(x, y):
    y_pred = forward_t(x)
    return (y_pred - y).pow(2).sum()

# compute gradient
def gradient_t(x, y):  # d_loss/d_w, d_loss/d_c
    # COMPLETE THE CELL FROM HERE (remove "pass")
    # ...
    pass


Main loop for computing the estimate (gradient descent)

### Question: code the gradient descent algorithm within the main loop.

In [None]:
learning_rate = 1e-2

w_t = w_t_init.clone()
b_t = b_t_init.clone()

for epoch in range(10):
    l = loss_t(x,y)
    grad_w, grad_b = gradient_t(x,y)
    # COMPLETE THE CELL FROM HERE
    # ...
    print("progress:", "epoch:", epoch, "loss",l)

In [None]:
# After training
print("Estimation of the parameters:")
print(w_t)
print(b_t)

### Question: Test a higger learning rate (e.g., learning_rate = 1). Explain what you observe.

### Question: How to improve the quality of the estimate?