In [21]:
import numpy as np
import torch
from torch import nn
from torch import optim
import torchvision
from torchvision import datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

# Manipulate the device and the precision

The point is to understand the basic pytorch structure which is a Tensor, typically a multi-dimensional matrix which contains a certain type 'dtype' data  

In [22]:
with_cuda = torch.cuda.is_available()
if with_cuda : 
    device = torch.device("cuda")
else : 
    device = torch.device("cpu") #by default

In [29]:
x = torch.randn(10, device = device )
print(x)

x = torch.randn(10, device = device, dtype = torch.float16) #the degree of precision we want to have
print(x)

x = torch.randn(5, device = device)
x = x.to(device)
print(x)

x = torch.randn(5)
x = x.to(device = device, dtype = torch.float16)
print(x)



tensor([-0.1967, -1.3958,  0.3857,  0.0059,  0.6488, -0.1323,  1.8120, -0.8132,
         0.2035,  1.8036])
tensor([ 1.9082, -0.8696, -0.0415, -0.0483,  2.0312, -0.8687, -0.4263,  0.1675,
        -0.7954, -0.9126], dtype=torch.float16)
tensor([ 0.7332, -1.3008,  0.9171, -0.9777,  0.3466])
tensor([ 0.9038, -0.0241,  0.7373, -0.3384,  1.2129], dtype=torch.float16)


# Backpropagation of the gradient

## Parameters and tensors

**Question 1**

Build some `torch.nn.Parameter`, some `torch.Tensor` and some `torch.Tensor` with `requires_grad = True`.

In [34]:
x1 = torch.nn.Parameter(torch.randn(5))
x2 = torch.nn.Parameter(torch.randn(1).squeeze())
a = torch.rand(5)
b = torch.rand(1).squeeze()
c = torch.randn(3, 4, requires_grad = True)

print(x1, "requires_grad:", x1.requires_grad)
print(x2, "requires_grad:", x2.requires_grad)
print(a, "requires_grad:", a.requires_grad)
print(b)
print(c)


Parameter containing:
tensor([ 0.1957,  1.4508, -0.9671,  0.1087,  2.3250], requires_grad=True) requires_grad: True
Parameter containing:
tensor(-0.8408, requires_grad=True) requires_grad: True
tensor([0.8455, 0.2452, 0.9908, 0.5523, 0.8587]) requires_grad: False
tensor(0.8857)
tensor([[-0.6282,  0.5380,  1.2627, -0.9518],
        [-1.0271,  0.6482, -1.2423, -0.3486],
        [-1.0635,  0.4053,  0.3507, -0.6813]], requires_grad=True)


## Computing gradients

**Question 2**

Let $f$ be the function defined below. Compute its derivative with respect to `x1` and `x2` by using `backward`, and then by using `torch.autograd.grad`.

In [None]:
x1 = torch.nn.Parameter(torch.randn(5))
x2 = torch.nn.Parameter(torch.randn(1).squeeze())
a = torch.rand(5)
b = torch.rand(1).squeeze()

y = x2 * torch.sin((a * x1).sum() + b)

print(x1.grad, x2.grad)
y.backward()
print(x1.grad, x2.grad)
print(y.grad_fn)
print(y.grad_fn.next_functions)

None None
tensor([0.1504, 0.0669, 0.1562, 0.1780, 0.2217]) tensor(0.8756)
<MulBackward0 object at 0x000001EC4D985510>
((<AccumulateGrad object at 0x000001EC7C465990>, 0), (<SinBackward0 object at 0x000001EC7C466200>, 0))


In [None]:
x1 = torch.nn.Parameter(torch.randn(5))
x2 = torch.nn.Parameter(torch.randn(1).squeeze())
a = torch.rand(5)
b = torch.rand(1).squeeze()

y = x2 * torch.sin((a * x1).sum() + b)