In [2]:
%matplotlib inline

In [3]:
import torch
import numpy as np

In [4]:
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)
print(x_data)

tensor([[1, 2],
        [3, 4]])


**From a NumPy array**

In [5]:
np_array = np.array(data)
x_np = torch.from_numpy(np_array)
print(x_np)

tensor([[1, 2],
        [3, 4]])


In [6]:
x_data.dtype

torch.int64

**From another tensor:**

The new tensor retains the properties (shape, datatype) of the argument tensor, unless explicitly overridden.

In [7]:
x_ones = torch.ones_like(x_data) # retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")
print(f"Shape: {x_ones.shape}")
print(f"Datatype: {x_ones.dtype}")

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")
print(f"Shape: {x_rand.shape}")
print(f"Datatype: {x_rand.dtype}")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Shape: torch.Size([2, 2])
Datatype: torch.int64
Random Tensor: 
 tensor([[0.0198, 0.8736],
        [0.4010, 0.1167]]) 

Shape: torch.Size([2, 2])
Datatype: torch.float32


**With random or constant values:**

``shape`` is a tuple of tensor dimensions. In the functions below, it determines the dimensionality of the output tensor.



In [8]:
shape = (2,3)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)



print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

Random Tensor: 
 tensor([[0.1507, 0.0038, 0.5435],
        [0.4573, 0.1021, 0.0184]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])


#### Attributes of a Tensor


In [9]:
tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

n = tensor.numpy()
print(f"numpy: {n}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu
numpy: [[0.12863088 0.28437078 0.508887   0.00617874]
 [0.11320645 0.6388707  0.5424011  0.93210065]
 [0.09244335 0.47709614 0.02761596 0.67227226]]


#### Operations on Tensors
~~~~~~~~~~~~~~~~~

By default, tensors are created on the CPU. We need to explicitly move tensors to the GPU using
``.to`` method (after checking for GPU availability). Keep in mind that copying large tensors
across devices can be expensive in terms of time and memory!



In [10]:
# We move our tensor to the GPU if available
if torch.cuda.is_available():
    tensor = tensor.to("cuda")

In [11]:
print(f"Device tensor is stored on: {tensor.device}")

Device tensor is stored on: cuda:0


**Standard numpy-like indexing and slicing:**



In [12]:
tensor = torch.ones(4, 4)
print(f"First row: {tensor[0]}")
print(f"First column: {tensor[:, 0]}")
print(f"Last column: {tensor[..., -1]}")
tensor[:,1] = 0
print(tensor)

First row: tensor([1., 1., 1., 1.])
First column: tensor([1., 1., 1., 1.])
Last column: tensor([1., 1., 1., 1.])
tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


stack: Concatenates sequence of tensors along a new dimension.

cat: Concatenates sequence of tensors along  the given dimension.

In [13]:
t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1.shape)
t1
# (d1, d2, ..., dn)->(d1, 3*d2, ..., dn)

torch.Size([4, 12])


tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])

In [14]:
t2 = torch.stack([tensor, tensor, tensor], dim=1)
print(t2.shape)
t2
# (d1, d2, ..., dn)->(d1, 3, d2, ..., dn)

torch.Size([4, 3, 4])


tensor([[[1., 0., 1., 1.],
         [1., 0., 1., 1.],
         [1., 0., 1., 1.]],

        [[1., 0., 1., 1.],
         [1., 0., 1., 1.],
         [1., 0., 1., 1.]],

        [[1., 0., 1., 1.],
         [1., 0., 1., 1.],
         [1., 0., 1., 1.]],

        [[1., 0., 1., 1.],
         [1., 0., 1., 1.],
         [1., 0., 1., 1.]]])

**Arithmetic operations**



In [15]:
import time
tensor = torch.tensor([[1,2,3], [1,2,3]])
tensor = torch.rand(8192*2, 8192*2).to("cuda")
print(tensor)
# This computes the element-wise addition.
begin_1 = time.perf_counter()
z1 = tensor + tensor
end_1 = time.perf_counter()
print(end_1-begin_1)
begin_2 = time.perf_counter()
z2 = tensor.add(tensor)
end_2 = time.perf_counter()
print(end_2-begin_2)
begin_3 = time.perf_counter()
z3 = torch.add(tensor, tensor)
end_3 = time.perf_counter()
print(end_3-begin_3)
print(z1)
print(z2)
print(z3)


# This computes the element-wise product.
begin_4 = time.perf_counter()
z1 = tensor * tensor
end_4 = time.perf_counter()
print(end_4-begin_4)
begin_5 = time.perf_counter()
z2 = tensor.mul(tensor)
end_5 = time.perf_counter()
print(end_5-begin_5)
begin_6 = time.perf_counter()
z3 = torch.mul(tensor, tensor)
end_6 = time.perf_counter()
print(end_6-begin_6)
print(z1)
print(z2)
print(z3)
#tensor * scalar  tensor.mul(scalar) torch.mul(tensor, scalar)

tensor([[0.0990, 0.6141, 0.7851,  ..., 0.2695, 0.6367, 0.7237],
        [0.1292, 0.2030, 0.4658,  ..., 0.7093, 0.4272, 0.0343],
        [0.6592, 0.0796, 0.9920,  ..., 0.9543, 0.0247, 0.5575],
        ...,
        [0.9674, 0.6204, 0.7153,  ..., 0.2719, 0.7288, 0.3797],
        [0.6176, 0.4246, 0.7363,  ..., 0.4042, 0.9919, 0.7624],
        [0.8900, 0.5922, 0.6244,  ..., 0.7449, 0.1976, 0.0030]],
       device='cuda:0')
0.02245839999523014
0.00360319996252656
0.0016382000176236033
tensor([[0.1980, 1.2281, 1.5702,  ..., 0.5390, 1.2734, 1.4475],
        [0.2585, 0.4059, 0.9315,  ..., 1.4187, 0.8544, 0.0686],
        [1.3184, 0.1592, 1.9841,  ..., 1.9087, 0.0494, 1.1151],
        ...,
        [1.9347, 1.2407, 1.4307,  ..., 0.5437, 1.4575, 0.7593],
        [1.2352, 0.8491, 1.4726,  ..., 0.8084, 1.9838, 1.5248],
        [1.7801, 1.1843, 1.2488,  ..., 1.4898, 0.3953, 0.0061]],
       device='cuda:0')
tensor([[0.1980, 1.2281, 1.5702,  ..., 0.5390, 1.2734, 1.4475],
        [0.2585, 0.4059, 0.931

In [16]:
tensor = torch.tensor([[1,2,3], [1,2,3]])
tensor = torch.rand(8192*4, 8192*4).to("cuda")
# This computes the matrix multiplication between two tensors.
begin_7 = time.perf_counter()
y1 = tensor @ tensor.T
end_7 = time.perf_counter()
print(end_7-begin_7)
begin_8 = time.perf_counter()
y2 = tensor.matmul(tensor.T)
end_8 = time.perf_counter()
print(end_8-begin_8)
begin_9 = time.perf_counter()
y3 = torch.matmul(tensor, tensor.T)
end_9 = time.perf_counter()
print(end_9-begin_9)
begin_10 = time.perf_counter()
y4 = torch.mm(tensor, tensor.T)
end_10 = time.perf_counter()
print(end_10-begin_10)
print(y1)
print(y2)
print(y3)
print(y4)

0.11485539993736893
2.9041872000088915
0.004579100059345365
0.017843300011008978
tensor([[11000.6094,  8158.0933,  8165.8862,  ...,  8204.7305,  8217.9697,
          8246.9697],
        [ 8158.0933, 10813.4033,  8097.5659,  ...,  8140.2598,  8182.1450,
          8152.4229],
        [ 8165.8862,  8097.5659, 10844.0205,  ...,  8144.2036,  8196.2178,
          8208.1670],
        ...,
        [ 8204.7305,  8140.2598,  8144.2036,  ..., 10905.6338,  8195.4648,
          8220.3730],
        [ 8217.9697,  8182.1450,  8196.2178,  ...,  8195.4648, 10951.3008,
          8214.2314],
        [ 8246.9697,  8152.4229,  8208.1670,  ...,  8220.3730,  8214.2314,
         10987.4551]], device='cuda:0')
tensor([[11000.6094,  8158.0933,  8165.8862,  ...,  8204.7305,  8217.9697,
          8246.9697],
        [ 8158.0933, 10813.4033,  8097.5659,  ...,  8140.2598,  8182.1450,
          8152.4229],
        [ 8165.8862,  8097.5659, 10844.0205,  ...,  8144.2036,  8196.2178,
          8208.1670],
        ...,
  

The 'out' argument can be used to put the result into a exising tensor.

In [17]:
print(f"{tensor} \n")
torch.add(tensor, 5, out=tensor)
print(tensor)

tensor([[0.4264, 0.0070, 0.1733,  ..., 0.9625, 0.1178, 0.7814],
        [0.2959, 0.3511, 0.9888,  ..., 0.6785, 0.0268, 0.2571],
        [0.6725, 0.1353, 0.7236,  ..., 0.6991, 0.9251, 0.2334],
        ...,
        [0.7741, 0.7595, 0.3189,  ..., 0.2400, 0.3351, 0.4530],
        [0.1178, 0.8468, 0.7940,  ..., 0.4842, 0.9984, 0.5704],
        [0.1927, 0.3054, 0.7184,  ..., 0.1116, 0.4114, 0.3085]],
       device='cuda:0') 

tensor([[5.4264, 5.0070, 5.1733,  ..., 5.9625, 5.1178, 5.7814],
        [5.2959, 5.3511, 5.9888,  ..., 5.6785, 5.0268, 5.2571],
        [5.6725, 5.1353, 5.7236,  ..., 5.6991, 5.9251, 5.2334],
        ...,
        [5.7741, 5.7595, 5.3189,  ..., 5.2400, 5.3351, 5.4530],
        [5.1178, 5.8468, 5.7940,  ..., 5.4842, 5.9984, 5.5704],
        [5.1927, 5.3054, 5.7184,  ..., 5.1116, 5.4114, 5.3085]],
       device='cuda:0')


Use item() to get the raw value from a pytorch tensor.



In [None]:
begin_11 = time.perf_counter()
agg = tensor.sum()
end_11 = time.perf_counter()
print(end_11-begin_11)
print(agg)
torch.sum(tensor, dim=1)
agg_item = agg.item()
print(agg_item, type(agg_item))

0.0001778000732883811
tensor(5.9056e+09, device='cuda:0')
5905594368.0 <class 'float'>


### Automatic Differentiation
To compute those gradients, PyTorch has a built-in differentiation engine called torch.autograd. It supports automatic computation of gradient for any computational graph.

In [None]:
import torch

x = torch.tensor([1.])  # input tensor
y = torch.tensor([2.])  # expected output
w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([0.], requires_grad=True)
z = x*w+b
#loss = torch.square(z - y).sum() 
loss = torch.pow(z - y, 2).sum()

To compute those derivatives, we call
``loss.backward()``, and then retrieve the values from ``w.grad`` and
``b.grad`` to get the derivatives $\frac{\partial loss}{\partial w}$ and
$\frac{\partial loss}{\partial b}$.


In [None]:
loss.backward()
print(w.grad)
print(b.grad)