In [226]:
# PyTorch Tutorial 60 Minute Blitz
import torch

In [227]:
# Create an uninitialized array, full of random bullshit.
x = torch.empty(5, 3)
x

tensor([[-3.1954e-28,  3.0723e-41,  2.9754e-28],
        [ 3.0723e-41,  1.1210e-43,  0.0000e+00],
        [ 8.9683e-44,  0.0000e+00,  3.1970e-28],
        [ 3.0723e-41,  8.9683e-44,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00]])

In [228]:
# Create a random array 5x3 with values.
# The seed is different each time this command is executed.
x = torch.rand(5,3)
x

tensor([[0.5812, 0.3984, 0.4931],
        [0.8362, 0.9903, 0.6703],
        [0.0138, 0.1097, 0.8951],
        [0.7876, 0.1612, 0.1026],
        [0.5032, 0.9224, 0.8303]])

In [229]:
# Create a tensor filled with zeros.
zs = torch.zeros(5,3,dtype=torch.long)
os = torch.ones (5,3,dtype=torch.double)
print(zs)
print(os)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)


In [230]:
# Create a tensor filled with explicit data.
x = torch.tensor([5.5000,3.0000])
x

tensor([5.5000, 3.0000])

In [231]:
# Get the size of a tensor.
print(x.size())

torch.Size([2])


In [232]:
# Lifted addition
rs = torch.rand(5,3)
print(rs+os)

tensor([[1.8160, 1.5971, 1.9292],
        [1.7030, 1.4448, 1.3767],
        [1.4316, 1.5978, 1.5195],
        [1.4373, 1.7099, 1.8377],
        [1.3206, 1.3751, 1.8026]], dtype=torch.float64)


In [233]:
# Lifted addition when providing an output tensor as an argument.
result = torch.empty(5,3)
torch.add(rs,os,out=result)
print(result)

tensor([[1.8160, 1.5971, 1.9292],
        [1.7030, 1.4448, 1.3767],
        [1.4316, 1.5978, 1.5195],
        [1.4373, 1.7099, 1.8377],
        [1.3206, 1.3751, 1.8026]])


In [234]:
# Lifted addition in-place.
#   Assignment is by-reference, so it updates the bound object.
bs = os
bs.add_(rs)
print(os)
print(bs)

tensor([[1.8160, 1.5971, 1.9292],
        [1.7030, 1.4448, 1.3767],
        [1.4316, 1.5978, 1.5195],
        [1.4373, 1.7099, 1.8377],
        [1.3206, 1.3751, 1.8026]], dtype=torch.float64)
tensor([[1.8160, 1.5971, 1.9292],
        [1.7030, 1.4448, 1.3767],
        [1.4316, 1.5978, 1.5195],
        [1.4373, 1.7099, 1.8377],
        [1.3206, 1.3751, 1.8026]], dtype=torch.float64)


In [235]:
# Check if CUDA is available.
torch.cuda.is_available()

True

In [236]:
# Create array on CUDA device.
device=torch.device("cuda")
x = torch.ones(3,5, device=device)
y = torch.rand(3,5, device=device)
x = x.to(device)
y = y.to(device)
z = x + y
print(z)

# Copy the array to CPU and also cast its type.
print(z.to("cpu"), torch.double)

tensor([[1.8160, 1.2413, 1.5391, 1.2282, 1.8971],
        [1.1555, 1.1497, 1.8399, 1.1999, 1.0986],
        [1.7422, 1.8401, 1.4053, 1.6501, 1.6021]], device='cuda:0')
tensor([[1.8160, 1.2413, 1.5391, 1.2282, 1.8971],
        [1.1555, 1.1497, 1.8399, 1.1999, 1.0986],
        [1.7422, 1.8401, 1.4053, 1.6501, 1.6021]]) torch.float64


In [238]:
# Create array on second CUDA device.
device=torch.device("cuda:1")
x = torch.ones(3,5, device=device)
y = torch.rand(3,5, device=device)
x = x.to(device)
y = y.to(device)
z = x + y
print(z)

print(z.to("cpu"), torch.double)

tensor([[1.4507, 1.8959, 1.3490, 1.3114, 1.5267],
        [1.1243, 1.5303, 1.1019, 1.6584, 1.9993],
        [1.8611, 1.2774, 1.4553, 1.0312, 1.4290]], device='cuda:1')
tensor([[1.4507, 1.8959, 1.3490, 1.3114, 1.5267],
        [1.1243, 1.5303, 1.1019, 1.6584, 1.9993],
        [1.8611, 1.2774, 1.4553, 1.0312, 1.4290]]) torch.float64


In [239]:
# Automatic Differentiation

In [240]:
# Adding requires_grad attaches a gradient buffer to the tensor.
# The gradient buffers start out empty.
a = torch.tensor([1.0, 2.0], requires_grad=True)
w = torch.tensor([3.0, 4.0], requires_grad=True)
print(a.grad)
print(w.grad)

None
None


In [241]:
# When we perform tensor operations, the grad_fn field tracks how the result was built.
y = torch.dot(a,w)
print(y)

tensor(11., grad_fn=<DotBackward>)


In [242]:
# Calling backward() on a scalar result pushes a sensitivity of 1
# back through the network, and frees the forward buffers along the way.
y.backward()
print(a.grad)
print(w.grad)

tensor([3., 4.])
tensor([1., 2.])


In [243]:
# backward() is an effectful operation that frees the forward buffers,
# so we can't call it again without re-running the forward pass.
# The following will fail.
#   y.backward()

In [244]:
# If we do multiple backwards passes on different subtrees
# we can accumulate multiple updates into the weights.
a = torch.tensor([1.0, 2.0], requires_grad=False)
w = torch.tensor([3.0, 4.0], requires_grad=True)
y = torch.dot(a,w)
z = torch.dot(a,w)
y.backward()
z.backward()
print(a.grad)
print(w.grad)

None
tensor([2., 4.])


In [245]:
# Get a tensor without the gradient information, using detach()
ad = a.detach()
print(ad)

tensor([1., 2.])
