### 1. Tensors


In [1]:
import torch
import numpy as np

# torch.empty(size): uninitiallized
x = torch.empty(1) # scalar
print("empty(1):", x)
x = torch.empty(3) # vector
print("empty(3):",x)
x = torch.empty(2, 3) # matrix
print("empty(2,3):",x)
x = torch.empty(2, 2, 3) # tensor, 3 dimensions
#x = torch.empty(2,2,2,3) # tensor, 4 dimensions
print("empty(2, 2, 3):",x)

# torch.rand(size): random numbers [0, 1]
x = torch.rand(5, 3)
print("rand(5,3):", x)

# torch.zeros(size), fill with 0
# torch.ones(size), fill with 1
x = torch.zeros(5, 3)
print("zeros(5,3):", x)

empty(1): tensor([0.])
empty(3): tensor([0., 0., 0.])
empty(2,3): tensor([[0., 0., 0.],
        [0., 0., 0.]])
empty(2, 2, 3): tensor([[[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]]])
rand(5,3): tensor([[0.9018, 0.5376, 0.7767],
        [0.7877, 0.1048, 0.6222],
        [0.4207, 0.7711, 0.2913],
        [0.5557, 0.2076, 0.3767],
        [0.7614, 0.7399, 0.5322]])
zeros(5,3): tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [2]:
# Check Size 
print("Size : " , x.size())
print("Shape : " , x.shape)

Size :  torch.Size([5, 3])
Shape :  torch.Size([5, 3])


In [3]:
# Check Datatype
print(x.dtype)

# Spcify types, float32 default
x = torch.zeros(5,3, dtype=torch.float16)
print(x)

# Check Type
print(x.dtype)

torch.float32
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=torch.float16)
torch.float16


In [4]:
# Construct from data 
x  = torch.tensor([5, 5, 3 ])
print(x, x.dtype)

tensor([5, 5, 3]) torch.int64


In [5]:
# Requires_grad argument 
# This will tell pytorch that it will need to calcuate the gradient for this tensor 
# Later in your optimization steps 
# i.e. this is a variable in your model that you want to optimize 

x = torch.tensor([5.5,3], requires_grad=True)
print(x) 


tensor([5.5000, 3.0000], requires_grad=True)


In [6]:
# Operations
x = torch.ones(2, 2)
y = torch.rand(2, 2)

# Elementwise addition
z = x + y
torch.add(x,y)
# torch.add(x,y)

#In plane addition, everything with a  trailing underscore is a inplace operation
#i.e. it will modify the variable
# y.add_(x)

print(x)
print(y)
print("Addition : ", z)

z = x - y 
torch.sub(x, y)
print("Subtraction : ", z)

z = x * y 
torch.mul(x, y)
print("Multiplication : ", z)

z = x / y 
torch.div(x, y)
print("Division : ", z)


tensor([[1., 1.],
        [1., 1.]])
tensor([[0.0934, 0.2557],
        [0.9618, 0.3293]])
Addition :  tensor([[1.0934, 1.2557],
        [1.9618, 1.3293]])
Subtraction :  tensor([[0.9066, 0.7443],
        [0.0382, 0.6707]])
Multiplication :  tensor([[0.0934, 0.2557],
        [0.9618, 0.3293]])
Division :  tensor([[10.7089,  3.9112],
        [ 1.0397,  3.0364]])


In [7]:
# Slicing 
x = torch.rand(5, 3)
print(x)
print("X[: , 0] ", x[: , 0 ])
print("X[1, :] ", x[1, :  ])
print("X[1, 1] ", x[1, 1])

print("x[1, 1].item() ", x[1,1].item())

tensor([[0.7268, 0.2558, 0.9466],
        [0.9341, 0.2437, 0.9370],
        [0.4573, 0.3338, 0.4509],
        [0.9987, 0.3440, 0.1433],
        [0.2874, 0.0312, 0.7243]])
X[: , 0]  tensor([0.7268, 0.9341, 0.4573, 0.9987, 0.2874])
X[1, :]  tensor([0.9341, 0.2437, 0.9370])
X[1, 1]  tensor(0.2437)
x[1, 1].item()  0.2436610460281372


In [8]:
# Reshape with torch.view()
x = torch.randn(4, 4) 
y = x.view(16) # Shapes the x as (16, ) in y
z = x.view(-1, 8) # The -1 tells PyTorch to infer the appropriate dimension automatically based on the total number of elements

print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


### NumPy

#### Converting a Torch to a NumPy array and vice versa is very easy.


In [9]:
import torch

a = torch.ones(5)
print(a)

b = a.numpy()
print(b)
print(type(b))


tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]
<class 'numpy.ndarray'>


In [10]:
# Numpy to torch with .from_numpy(x), or torch.tensor() to copy it 
a = np.ones(5)
b = torch.from_numpy(a)
c = torch.tensor(a)
print(a)
print(b)
print(c)

# Again be careful when modifying
a += 1 
print(a)
print(b)
print(c)

[1. 1. 1. 1. 1.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


### GPU Support

#### By default are created on the CPU. But we can also move them to the GPU (if it's available), or create them direcly on the GPU


In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

x = torch.rand(2, 2).to(device) # Move tensors to GPU device 
# x = a.to("cpu")
# x = x.to('cuda')

x = torch.rand(2, 2, device=device) # or directly create them on GPU 

### 2. Autograd

The autograd package provides automatic differentiation for all operations on Tensors. Generally speaking, torch.autograd is an engine for computing the vector-Jacobian product. It computes partial derivatives while applying the chain rule.


In [12]:
# requires_grad = True --> tracks all operations on the tensor. 
x = torch.randn(3, requires_grad=True )
y = x + 2 

# y was created as a result of an operation, so it has a grad_fn attribute
# grad_fn: references a Function that has created the Tensor
print(x)
print(y)
print(y.grad_fn)

tensor([-0.5618, -0.6133, -0.0451], requires_grad=True)
tensor([1.4382, 1.3867, 1.9549], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x00000295CC173A90>


In [13]:
# Do more operations of y 
z = y * y * 3
print(z)
z = z.mean()
print(z)

tensor([ 6.2052,  5.7690, 11.4645], grad_fn=<MulBackward0>)
tensor(7.8129, grad_fn=<MeanBackward0>)


In [14]:
# Let's compute the graidients with backpropogation
# When we finish our computation we can  call .backward() and have all the gradients computer automatically. 
# The gradient for this tensor will be accumulated into .grad attribute. 
# It is the partial derivate of the function w.r.t. the tensor

z = y * y * 3
print(z)
z = z.mean()
print(z)


# ! ! ! Careful ! ! ! backward() accumulate the gradient for this tensor into .grad attribute. 
# ! ! ! We need to be careful during optimization ! ! ! optimizer.zero_grad()

tensor([ 6.2052,  5.7690, 11.4645], grad_fn=<MulBackward0>)
tensor(7.8129, grad_fn=<MeanBackward0>)


In [15]:
z.backward()
print(x.grad)

tensor([2.8764, 2.7734, 3.9097])


## Stop a tensor from tracking history:

For example during the training loop when we want to update our weights, or after training during evaluation. These operation part of the gradient computation. To prevent this, we can use:

- x.requires_grad(False)
- x.detach()
- wrap in with torch.no_grad():


In [None]:
# .requires_grad(...) changes on existing falg in-place 
a = torch.randn(2, 2)
b = (a*a).sum()
print(a.requires_grad)
print(b.grad_fn)

a.requires_grad_(True)
b = (a*a).sum()
print(a.requires_grad)
print(b.grad_fn)

tensor([[0.9073, 0.9877],
        [1.0416, 0.9997]])
tensor([[0.8233, 0.9756],
        [1.0849, 0.9994]])
tensor(3.8831)
False
None
True
<SumBackward0 object at 0x00000295CC173A90>


In [21]:
# .deatch(): get a new tensor with the same content but no gradient computation: 
a = torch.randn(2, 2, requires_grad=True)
b = a.detach()
print(a.requires_grad)
print(b.requires_grad)

True
False
