# TENSORS
## Tensor initialisation and basic operations 

In [55]:
import torch
import numpy as np

In [21]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [22]:
my_tensor = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype = torch.float32, device = device, requires_grad = True) #requires_grad is for computing gradients for backward propagation.

In [23]:
print(my_tensor)
print(my_tensor.dtype)
print(my_tensor.device)
print(my_tensor.requires_grad)

tensor([[1., 2., 3.],
        [4., 5., 6.]], device='cuda:0', requires_grad=True)
torch.float32
cuda:0
True


In [24]:
#other common initialisations

x = torch.empty(size = (3, 3)) #random data as it wasn't initialised

In [25]:
torch.zeros((3, 3))

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [26]:
torch.rand((3, 3))

tensor([[0.4914, 0.3573, 0.9697],
        [0.8636, 0.0775, 0.0496],
        [0.3643, 0.4841, 0.4053]])

In [27]:
torch.ones((3, 3))

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [28]:
torch.eye(3, 3)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [29]:
torch.arange(start = 0, end = 5, step = 1)

tensor([0, 1, 2, 3, 4])

In [30]:
torch.linspace(start = 0.1, end = 1, steps = 10)

tensor([0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000, 0.9000,
        1.0000])

In [31]:
torch.empty(size = (1,5)).normal_(mean =0, std = 1) #normal distribution

tensor([[-1.0626,  0.0582,  0.9236,  0.6054,  0.2655]])

In [33]:
torch.empty(size = (1, 5)).uniform_(0, 1)

tensor([[0.3358, 0.4247, 0.9890, 0.3521, 0.8014]])

In [35]:
torch.diag(torch.ones(5))

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [38]:
# intialise and convert between dtypes

tensor = torch.arange(4)
tensor.bool()

tensor([False,  True,  True,  True])

In [50]:
tensor.short(), tensor.long() # long is 64

(tensor([0, 1, 2, 3], dtype=torch.int16), tensor([0, 1, 2, 3]))

In [53]:
tensor.half(), tensor.float() #float 32

(tensor([0., 1., 2., 3.], dtype=torch.float16), torch.float32)

In [57]:
#array to tensor and vice versa

np_array = np.zeros((5, 5))
tensor = torch.from_numpy(np_array)
np_array_back = tensor.numpy()

tensor, np_array_back

(tensor([[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]], dtype=torch.float64),
 array([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]))

## Tensor Math and Comparison operations

In [58]:
x = torch.tensor([1, 2, 3])
y = torch.tensor([5, 6, 7])

In [65]:
z1 = torch.empty(3)

#addition

torch.add(x, y, out = z1)

z2 = torch.add(x, y)

z = x + y

z

tensor([ 6,  8, 10])

In [69]:
# subtraction
z = x - y
z

tensor([-4, -4, -4])

In [74]:
# division

z = torch.true_divide(x, y)
z

tensor([0.2000, 0.3333, 0.4286])

In [82]:
# inplace operations

t = torch.zeros(3)
t.add_(x) # underscore means inplace as in computation doesn't creat a copy

t += x # t = t + x creates a copy though

t

tensor([2., 4., 6.])

In [88]:
# exponent

x.pow(2) , x ** 2

(tensor([1, 4, 9]), tensor([1, 4, 9]))

In [87]:
# comparison

x > 0 , x < 0

(tensor([True, True, True]), tensor([False, False, False]))

In [114]:
# matrix multiplication

x1 = torch.rand((2, 5))
x2 = torch.rand((5, 6))

torch.mm(x1, x2), x1.mm(x2)

(tensor([[1.5801, 1.3826, 1.0734, 0.6623, 0.8886, 1.3804],
         [0.8493, 0.7737, 0.6160, 0.4358, 0.4811, 0.9993]]),
 tensor([[1.5801, 1.3826, 1.0734, 0.6623, 0.8886, 1.3804],
         [0.8493, 0.7737, 0.6160, 0.4358, 0.4811, 0.9993]]))

In [115]:
# matrix exponent

matrix_exp = torch.rand((5, 5))
matrix_exp.matrix_power(3) # this would be matrix multiplication 3 times, where  as the ** operation raises each element to that power

tensor([[1.3694, 1.8318, 1.4789, 1.9286, 1.9843],
        [1.1742, 1.5292, 1.0768, 1.5680, 1.5190],
        [1.0820, 1.4455, 0.9650, 1.4467, 1.3559],
        [2.0911, 2.7951, 2.1933, 2.8093, 2.9095],
        [0.9083, 1.4471, 0.9832, 1.2958, 1.1885]])

In [117]:
# element wise multiplication

x * y

tensor([ 5, 12, 21])

In [118]:
# dot product 

torch.dot(x , y)

tensor(38)

In [119]:
# batch matrix multiplication

batch = 32
n = 10
m = 20
p = 30

tensor1 = torch.rand((batch, n, m))
tensor2 = torch.rand((batch, m, p))
out = torch.bmm(tensor1, tensor2)
out.shape, out

(torch.Size([32, 10, 30]),
 tensor([[[5.2096, 6.5052, 5.5499,  ..., 5.4167, 5.2892, 4.4246],
          [3.8972, 5.5183, 4.0562,  ..., 4.8308, 4.9280, 4.2078],
          [2.8570, 3.7924, 3.6080,  ..., 4.2305, 3.6435, 3.6694],
          ...,
          [4.1879, 4.7268, 4.0195,  ..., 4.9499, 4.0482, 4.1607],
          [2.8936, 4.8215, 3.9551,  ..., 3.5609, 3.4895, 2.9048],
          [4.6333, 4.3567, 3.7461,  ..., 4.8359, 4.4842, 3.3589]],
 
         [[5.5389, 4.0957, 4.0988,  ..., 5.1820, 3.2790, 5.2206],
          [5.6933, 4.9825, 5.2936,  ..., 5.4932, 3.4088, 6.0425],
          [4.8927, 4.4651, 4.3271,  ..., 5.0856, 3.6357, 5.3072],
          ...,
          [7.1618, 5.5546, 5.8373,  ..., 6.6575, 4.6782, 6.7063],
          [6.6838, 4.1940, 5.5299,  ..., 5.7304, 4.4882, 5.8065],
          [6.2902, 4.7680, 5.8180,  ..., 5.8182, 4.3697, 5.3610]],
 
         [[3.5986, 5.1474, 3.8595,  ..., 5.2207, 4.9279, 4.8838],
          [4.3088, 4.6128, 4.6537,  ..., 5.7601, 5.3526, 5.1020],
          [4.

In [145]:
# examples of broadcasting

x1 = torch.rand((6, 5))
x2 = torch.rand((1, 5))

x1 - x2 # the x2 vector is broadcasted in the shape of x1 to perform the operation

tensor([[-0.2400, -0.5945,  0.6404, -0.0613, -0.8708],
        [ 0.3390, -0.1035,  0.6346, -0.1587, -0.1465],
        [-0.3589,  0.0587,  0.1696,  0.2667, -0.3610],
        [-0.0470, -0.4175,  0.1579,  0.0619, -0.1772],
        [-0.2514, -0.1955, -0.0064, -0.1187, -0.6842],
        [ 0.3260, -0.4713,  0.1500,  0.2487, -0.3629]])

In [146]:
x1 ** x2

tensor([[0.4822, 0.2256, 0.9847, 0.6874, 0.0066],
        [0.8873, 0.7066, 0.9843, 0.4449, 0.7573],
        [0.2886, 0.8360, 0.9393, 0.8700, 0.5580],
        [0.6597, 0.4236, 0.9371, 0.7821, 0.7293],
        [0.4685, 0.6291, 0.8728, 0.6028, 0.2341],
        [0.8810, 0.3680, 0.9356, 0.8639, 0.5563]])

In [154]:
#other useful operations

x1.shape, torch.sum(x1, dim = 1)

(torch.Size([6, 5]), tensor([1.0813, 2.7727, 1.9828, 1.7858, 0.9516, 2.0983]))

In [160]:
torch.max(x, dim = 0), torch.argmax(x, dim=0) # argmax only returns the index of the highest value 

(torch.return_types.max(
 values=tensor(3),
 indices=tensor(2)),
 tensor(2))

In [162]:
torch.min(x, dim = 0), torch.argmin(x, dim = 0)

(torch.return_types.min(
 values=tensor(1),
 indices=tensor(0)),
 tensor(0))

In [158]:
torch.abs(x)

tensor([1, 2, 3])

In [163]:
torch.mean(x.float(), dim =0)

tensor(2.)

In [166]:
torch.eq(x, y)

tensor([False, False, False])

In [170]:
torch.sort(y, dim =0, descending = False)

torch.return_types.sort(
values=tensor([5, 6, 7]),
indices=tensor([0, 1, 2]))

In [177]:
torch.clamp(x, min = 0)

tensor([1, 2, 3])

In [179]:
x = torch.tensor([1, 0, 1, 1, 1], dtype = torch.bool) 

In [181]:
torch.any(x), torch.all(x)

(tensor(True), tensor(False))

you can just use x.any() and a similar depection for other functions as well if you want to.

## Tensor Indexing

In [184]:
batch_size = 10
features = 25

In [185]:
x = torch.rand((batch_size, features))

In [190]:
x[0].shape # x[0, :]

torch.Size([25])

In [193]:
x[:, 0].shape

torch.Size([10])

In [195]:
x[2, 0:10] #0:10 -> first 10 features in the second sample 

tensor([0.5747, 0.8619, 0.0943, 0.9597, 0.8842, 0.9235, 0.3013, 0.2056, 0.8703,
        0.6982])

In [196]:
x[0, 0] = 100

In [201]:
# fancy indexing
x = torch.arange(10)
indicies = [2, 5, 8]
print(x[indicies])

tensor([2, 5, 8])


In [216]:
x = torch.rand((2, 5))

cols = torch.tensor([4, 0])
rows = torch.tensor([1, 0])

x, x[rows, cols]

(tensor([[0.1653, 0.8488, 0.1230, 0.3068, 0.2207],
         [0.9838, 0.6086, 0.0700, 0.8984, 0.4904]]),
 tensor([0.4904, 0.1653]))

In [219]:
x = torch.arange(10)
x[(x < 2) | (x > 8)]

tensor([0, 1, 9])

In [220]:
x[x.remainder(2) == 0] # remainder on division is 0 then returns that

tensor([0, 2, 4, 6, 8])

In [221]:
# useful operations

torch.where(x > 5, x, x*2)

tensor([ 0,  2,  4,  6,  8, 10,  6,  7,  8,  9])

In [222]:
torch.tensor([0,0,1,2,2,3,4]).unique()

tensor([0, 1, 2, 3, 4])

In [223]:
x.ndimension() # 5x5x5 return 3.

1

In [224]:
x.numel() #number of elements in x 

10

## Tensor Reshaping

In [225]:
x = torch.arange(9)

In [235]:
x_3x3 = x.view(3, 3) #acts on contigous tensor .
x_3x3

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

In [232]:
x_3x3 = x.reshape(3, 3) #doesn't matter if it isn't a contigous tensor.
x_3x3

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

In [251]:
x_3x3 = x.view(3, 3)
y = x_3x3.t() # [0, 3, 6, 1, 4, 7, 2, 5, 8]
# y.view(9) leads to error -> view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
y.contiguous().view(9)

tensor([0, 3, 6, 1, 4, 7, 2, 5, 8])

In [253]:
x_3x3 = x.reshape(3, 3)
y = x_3x3.t()
y.reshape(9)

tensor([0, 3, 6, 1, 4, 7, 2, 5, 8])

In [259]:
x1 = torch.rand((2, 5))
x2 = torch.rand((2, 5))
torch.cat((x1, x2), dim = 0).shape, torch.cat((x1, x2), dim = 1).shape

(torch.Size([4, 5]), torch.Size([2, 10]))

In [261]:
z1 = x1.view(-1)

tensor([0.1152, 0.1457, 0.9178, 0.6553, 0.7435, 0.6459, 0.5085, 0.2058, 0.5520,
        0.3374])

In [267]:
batch = 64
x = torch.rand((batch, 2, 5))
z = x.view(batch, -1)
z.shape

torch.Size([64, 10])

In [271]:
z = x.permute(0, 2, 1) # use this to change the dimensions of the tensors, or some sort of swapping of the dimensions
z.shape

torch.Size([64, 5, 2])

In [273]:
x = torch.arange(10)
x.unsqueeze(0).shape

torch.Size([1, 10])

In [276]:
x.unsqueeze(1).shape

torch.Size([10, 1])

In [286]:
x = torch.arange(10).unsqueeze(0).unsqueeze(1)
x.shape

torch.Size([1, 1, 10])

In [290]:
z = x.squeeze(0)
z.shape

torch.Size([1, 10])