### Notes on torch.tensor operations

In [None]:
import torch
import numpy as np 

In [None]:
""" Find the maximum values and corresponding indices along axis=dim in a tensor """
import torch

x = torch.empty(5,3)
values, indices = torch.max(x,dim=1)

# if just skip `dim`, will get the max element in the entire tensor
x = torch.randint(0, 9, (2, 3, 4))
print(x)
print(torch.max(x))

In [None]:
""" convert a tuple into torch.Size object """
import torch

shape = (4, 5, 6)
print(shape)
shape = torch.Size(shape)
print(shape)
torch.Tensor(shape)

In [None]:
""" convert a torch.tensor or torch.Size object into a tuple """
import torch

x = torch.randn(5)
tuple(x.shape)

In [None]:
""" """

In [None]:
""" reshape tensor """
import torch

x = torch.randn(5)
print(x.shape)
x1 = x.reshape((1,) + tuple(x.shape))
print(x1.shape)

y = torch.randn(2, 3, 4)
print(y.shape)
y1 = y.reshape(-1)          # flatten the tensor to 1d
print(y1.shape)
y2 = y.reshape(2, -1, 4)
print(y2.shape)
y3 = y.reshape(2, -1)       # -1 means to infer the dimension; it is usually used when I don't actually know the exact number or variable representing this dimension;
print(y3.shape)

In [None]:
""" Converting between tensors and ndarrays """

# convert a tensor to a ndarray
x = torch.randn(5,3)
y = x.numpy()

# conver a ndarray to a tensor
z = torch.from_numpy(y)

In [None]:
""" tensor slice while maintaining shape """
import torch

x = torch.randint(0, 10, (1, 5, 16))
print(x)
print(x[:,0:2,:])
# note that x[:,0,:] will reduce dimension; must use slicing [i:j] notation rather than indexing [i] notation
print(x[:, 0, :])

In [None]:
""" basic tensor slicing """

import torch

x = torch.randint(0, 10, (3,4,5))
print(x)
### slice along dim=1; can omit rest of dimensions;
print(x[:,:2])
### equivalent;
assert torch.all(x[:,:2] == x[:,:2,:])
y = torch.randint(0, 10, (2,3,4))
print(y)
y[None, :, :].shape # adds a dummy dimension at dim=0

In [None]:
""" change tensor data type """

import torch

x = torch.randint(0, 10, (20,))
print(x.dtype)
x = x.float()
print(x.dtype)
x = x.double()
print(x.dtype)

_note_:
* if x.requires_grad=True, can not call numpy()

In [None]:
""" use torch.tensor.item() to return single-element tensor as a python number """

x = torch.tensor([1.0])
x.item()

In [None]:
""" return size of a tensor """
import torch

x = torch.randn(5,3)
# return a torch.Size object of dimensions along each axis
print(x.size())
print(x.shape)
# return number of axis
print(x.dim())

In [None]:
""" torch.sum(): sum along a tensor dimension """

x = torch.randn(5,3,4)
x1 = torch.sum(x,dim=1,keepdim=False)   # keepdim=False is the default behavior
print(x1.shape)
x2 = torch.sum(x,dim=1,keepdim=True)
print(x2.shape)
print(torch.sum(x).shape)     # just sums all elements in x, returns a scalar tensor

In [None]:
""" detach() """

x = torch.randn(5, 3, requires_grad=True)
print(x)
y = torch.sum(x * 2)
print(x.requires_grad, y.requires_grad)

# create z by detach() x from compute graph
z = y.detach()
print(y.requires_grad)          # x is un-modified
print(z.requires_grad)          # z.requires_grad set to False

# call backward() method on y, with requires_grad=True
y.backward()
print(x.grad)

# z and y shares same storage, any change to z will update y
print(y)
print(z)
z += 1
print(z)
print(y)                        # y is also updated to +1


In [None]:
""" apply element-wise transformations to tensor """

x = torch.randn(5, 3)
print(x)

# take exp
x = torch.exp(x)
print(x)

_note_:
* Pytorch does not currently support custom element-wise lambda functions for tensor
  * a solution maybe to convert to np.ndarrays first, or use a stack of built-in element-wise functions

In [None]:
""" conditional slicing """
import torch

x = torch.rand(32,10)                               # x: a 32x10 tensor (e.g., xent outputs where batch=32, num of classes = 10)
labels = torch.randint(0,10,(32,))                  # labels: a 32x1 tensor of integers (e.g., each element is a correct label index)
print(x)
print(labels)

# torch.max(tensor, dim) will return a tuple of two tensors (val_max, arg_max)
max_val, indices = torch.max(x, dim=1)
# tensor[condition] (for 1d tensor; for higher d, use slicing syntax like [:, condition]) slices the tensor if the condition is evaluated to be True element-wise
torch.sum(max_val[indices == labels]).item()

In [None]:
""" masked slicing conditioned on another tensor """
import torch

x = torch.zeros(32,10)                              # x: a 32x10 tensor (e.g., xent outputs where batch=32, num of classes = 10)
labels = torch.randint(0,10,(32,))                  # labels: a 32x1 tensor of integers (e.g., each element is a correct label index)

# construct a 32x10 mask tensor obj, mask[i][j] = True if labels[i] == j -> use values in labels as indices
# note that gather(), select() methods all broadcast indices along axis other than dim specified, so won't work here
mask = torch.BoolTensor([[True if i == labels[j] else False for i in range(x.size()[1]) ] for j in range(x.size()[0])])
torch.masked_select(x, mask)

In [None]:
""" torch.index_select(tensor, dim, indices) """
import torch

x = torch.randint(0, 9, (3, 4))
print(x)
indices = torch.tensor([0, 2])
print(torch.index_select(x, 1, indices))
print(torch.index_select(x, 0, indices))

_note_:
* mask must be torch.BoolTensor type

In [None]:
""" check if two tensors are equal """

# torch.equal() returns True if all elements are equal
x = torch.rand(3,3)
y = torch.rand(3,3)

# returns a final boolean with torch.equal()
print(torch.equal(x,y))
# returns a BoolTensor with torch.eq()
print(torch.eq(x,y))
# or
print(torch.all(torch.eq(x,y)))

z = x.clone()
print(torch.equal(x,z))

# to return True if no elements are equal, can do the following
print((x != y).all())

# to return True if at least some elements are not equal, can do the following
print((x != y).any())

In [None]:
""" return all zero elements' indices in a tensor """

x = torch.rand(3,3)
y = torch.rand(3,3)
y[:,2] = x[:,2]
print(x)
print(y)

# return the non-zero element indices as a tuple
tup = (x - y).nonzero(as_tuple=True)
print(tup)
# can directly use the returned tuple to access the tensor elements
print(x[tup])

# return the zero element indices as a tuple
tup2 = ((x - y) == 0).nonzero(as_tuple=True)
print(tup2)
print(x[tup2])

torch.stack() vs torch.cat()
* stack() a list of tensors along a new axis, output tensor would have an additional axis than input tensors
* cat() a list of tensors along an existing axis, output tensor has equal # of axes as input tensors
* stack() = unsqueeze() + cat()

In [None]:
""" torch.stack() + torch.transpose() + torch.flatten() """

import torch

t = torch.rand(1, 9, 1, 1)
print(t)
print(t.shape)
lst = torch.split(t, 3, dim=1)
print(lst)
a = torch.cat(lst, dim=1)
print('break')
print(a)
print(a.shape)

x = torch.stack(lst, dim=1)
print(x)
print(x.shape)

y = torch.transpose(x, dim0=1, dim1=2)
print(y)
print(y.shape)

z = torch.flatten(y, start_dim=1, end_dim=2)
print(z)
print(z.shape)

In [None]:
x = torch.randint(0, 10, (12,))
print(x)
y = x.numpy()
print(y)
import numpy as np
np.array_split(y, 12)

In [None]:
""" torch.stack() adds a new dimension """

import torch

x1 = torch.rand(1,)
x2 = torch.rand(1,)
x3 = torch.rand(1,)
lst = [x1, x2, x3]
print(x1)
print(x2)
print(x3)
y1 = torch.stack(lst, dim=0)    # adds the new dimension at dim=0 (becomes first axis)
print(y1.shape)
print(y1)
y2 = torch.stack(lst, dim=1)    # adds the new dimension at dim=1 (becomes second & last axis)
print(y2.shape)
print(y2)
y3 = torch.stack(lst, dim=-1)   # adds the new dimension at dim=1 (becomes second & last axis)
print(y3.shape)
print(y3)
y4 = torch.stack(lst, dim=-2)   # adds the new dimension at dim=0 (becomes first axis)
print(y4.shape)
print(y4)
# y5 = torch.stack(lst, dim=2)    # error: dim must be in range [-(x.dim()+1), x.dim()], so in this case x.dim()==1 and the range is [-2, 1]
# the rule is very simple:
# - `dim` must be in the above range
# - the new axis is added at `dim`

In [None]:
""" keep in mind the difference between tensor([1]), tensor(1), tensor(1.)"""

x1 = torch.tensor([1,2,3])
x2 = torch.tensor([1])
x3 = torch.tensor(1)
print(x2)
print(x3)
print(x2.dim())
print(x3.dim())
# float
x4 = torch.tensor(1.)
print(x4)
print(x4.shape)
x5 = torch.tensor(1)
print(x5)
print(x5.reshape((1,)))
x3.shape

In [None]:
import torch
lst = [torch.tensor(1), torch.tensor(1), torch.tensor(1)]
torch.cat(lst, dim=0)

In [None]:
""" element-wise tensor multiplication by broadcasting """
import torch

x = torch.randint(0, 10, (5, 3, 3))
y = torch.randint(0, 10, (5, 1, 1))
print(x)
print(y)
# can directly multiply two tensors, if one of the axes matches in dimensions
x * y

In [None]:
""" 
torch.repeat_interleave(x, repeat, dim)

- repeats every entry in tensor x by number=repeat for that dimension
- repeat must have the same size as input along dim
- dim is optional; if not specified, repeat must be integer
"""

x = torch.tensor([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
print(x)

# repeat all entries in all dimensions & return a flattened tensor
x_1 = torch.repeat_interleave(x, 2)
print(x_1)

# repeat entry x[i] by repeat[i] along dim=0
x_2 = torch.repeat_interleave(x, torch.tensor([1,2,3,4]), dim=0)
print(x_2)

In [None]:
""" create a random tensors for specified shape """

# integers; each entry within range [0, 9]; shape = (2,2,4)
x = torch.randint(0, 10 ,(2, 2, 4))

# random floats of shape torch.tensor([5,3,4]) sampled from N(0, 1)
y_n = torch.randn(5, 3, 4)
# random floats sampled from [0, 1] uniformly
y_p = torch.rand(5, 3, 4)

In [None]:
""" return data type in tensor """
import torch
x = torch.randn(5, 3)
len(x.shape)

In [None]:
""" 
enumerate a tensor 

- equivalent to split a tensor along dim=0
"""

x = torch.randn(5, 3, 4)
print(x)

for count, matrix in enumerate(x):
    print(count)
    print(matrix)

In [None]:
"""
torch.bmm(input1, input2)

- batch matrix-matrix product
- input1=bxnxm, input2=bxmxp; returns bxnxp
"""

torch.bmm(torch.ones(2, 1, 3), torch.ones(2, 3, 2))

In [None]:
"""
torch.arange(start=0, stop, step=1)

- generate a 1-D tensor of arithmetic sequence

note:
- start=0, step=1 as defaults, these two arguments are optional
"""

import torch

# start=1, end=10, step=2
x = torch.arange(1, 10, 2)
print(x)
# start=1, end=10, step=1
y = torch.arange(10)
print(y)
z = torch.randint(0, 10, (2,3))
print(z)
print(torch.arange(z.shape[1]))

In [None]:
""" product of all elements in tensor """

x = torch.randint(0, 10, (2, 2))
print(x)
print(torch.prod(x, dim=0))
print(torch.prod(x, dim=1))

In [None]:
""" torch.repeat """
import torch
x = torch.randint(0, 10, (2, 2))
print(x)
# repeat 2 times along dimension=0 and 3 times along dimension=1;
x.repeat(2,3)

In [None]:
""" torch.clamp clips all elements in input tensor by a range """

import torch

x = torch.randint(0, 10, (3, 4, 5))
print(x)
y = torch.clamp(x, min=3, max=6)
print(y)

* convert from numpy.dtype to torch.dtype is problematic
* see [this post](https://discuss.pytorch.org/t/converting-a-numpy-dtype-to-torch-dtype/52279)

In [None]:
""" torch.tensor vs torch.from_numpy() """

# from_numpy() expects a np.ndarray, so can not work for scalars
# otherwise two method yields identical tensors?

In [None]:
""" torch.BoolTensor requires a list to instantiate """
import torch

x = torch.BoolTensor([True for _ in range(10)])
print(x)

y = torch.BoolTensor((3,))
print(y)

In [None]:
""" use einops """

from einops import rearrange

data, labels = batch
print(data.shape)
a = rearrange(data[0:1, :3], 'c h w -> h w c')
print(a.shape)
b = rearrange(a, 'h w c -> () c h w')
print(b.shape)
c = rearrange(b, 'b c h w -> b (c h w)')
print(c.shape)
d = rearrange(c, '() classes -> classes')
print(d.shape)

In [None]:
""" torch.unsqueeze() """

import torch

x = torch.tensor([1,2,3,4])
print(x.shape)
x1 = x.unsqueeze(0)
print(x1)
print(x1.shape)
x2 = x.unsqueeze(1)
print(x2)
print(x2.shape)
x3 = x.unsqueeze(-1)
print(x3)
print(x3.shape)

In [None]:
import torch

x = torch.tensor([0,1,0,1])
print(x.dim())
print(x.shape)
x = x.unsqueeze(0).unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
print(x)
print(x.shape)

In [None]:
""" some special tensor methods """

import torch

x = torch.rand((2,2))
print(x)

### returns the sign of a tensor
print(x.sign())
### matrix transpose (dim <= 2)
print(x.t())

In [None]:
""" element-wise operators broadcast """
import torch
import einops

# broadcast rules:
# 1. tensors match in .dim(); exact match in .shape not needed;
# 2. tensors match exactly in .shape along non-singleton dimenesions, i.e., dimensions w/t elements >= 1;
# so if need to apply masking to a specific dimension, the mask tensor must:
# 1) has the same .dim() as x; 2) match in .shape w/t x at the specific axis; 3) all other dimension should be singleton dimensions

x = torch.randint(0, 9, (2, 3, 4))
print(x)
print(x.shape)
mask = torch.tensor([0,1,0])
mask1 = mask.unsqueeze(0).unsqueeze(-1)
print(mask1)
print(mask1.shape)
print(x * mask1)
# use einops
mask2 = einops.repeat(mask, 'h -> n1 h n2', n1=1, n2=1)
print(mask2)
print(x * mask2)


In [None]:
""" torch.norm: now deprecated; use torch.linalg.norm instead """

In [None]:
""" check all elements in a tensor satisfies a condition or not """

import torch

x = torch.randint(0, 10, (2,3,4))
print(x)
print(torch.all(x >= 0))

y = torch.randn((2,3,4))
print(y)
print(torch.all(y >= 0))

In [None]:
""" produce an identity matrix """

import torch

x = torch.ones(3)
torch.diag(x)

### a better way is to use torch.eye()
torch.eye(5)

In [None]:
""" torch.unbind() """

# returns a tuple of tensors s/t the dimension of dim is removed;

import torch

x = torch.tensor([[1,2,3],[4,5,6],[7,8,9]])
print(x)
print(x.unbind(dim=1))

y = torch.randint(0, 10, (2,3,4))
print(y.shape)
from einops import rearrange
y1 = rearrange(y, '... (d j) -> ... d j', j=2)
print(y1.shape)
print(y)
print(y1)
z1, z2 = y1.unbind(dim=-1)
print(z1)
print(z2)

In [None]:
""" torch.expand """

import torch

x = torch.randint(0, 10, (5,))
print(x)
print(x.shape)
y = x.expand((2, 5))
print(y)
print(y.shape)

### expand(1,-1) has the same effect of adding a dummy dimension at shape[0];
z = x.expand((1, -1))
print(z)
print(z.shape)

In [None]:
x = (1,2,3,4)
print(x)
print(*x)

In [None]:
import torch
import numpy as np

x = torch.tensor([[1,2],[3,4]]).numpy()
y = torch.tensor([5,6,7]).numpy()

z = np.multiply.outer(x, y)
z = torch.from_numpy(z)
print(z.shape)
print(z)

In [None]:
import torch

x = torch.randint(0, 10, (1, 2, 5, 10))
y = torch.randint(0, 10, (1, 2, 3, 10))

a = (2 ** 2)
print(a)

print((x * a))

torch.einsum('...ik, ...jk -> ...ij', (x * 2), y)

In [None]:
""" tensor slicing: alternate odd / even positions. """
import torch
from einops import rearrange

x = torch.randint(0, 10, (4, 6))
print(x)
a = x[:, 0::2]      # even  
b = x[:, 1::2]      # odd
print(a)
print(b)
print(x.shape)
print(a.shape)

In [None]:
""" several ways to interleave two tensors. """
import torch

a = torch.ones([2, 3, 4])
b = torch.zeros([2, 3, 4])
print(a.shape)
# interleave along dim = 1
x = map(lambda t: torch.stack(t, dim=2),  zip(a.unbind(dim=1), b.unbind(dim=1)))
print(torch.cat(list(x), dim=2))
# interleave along dim = 2
x = map(lambda t: torch.stack(t, dim=1),  zip(a.unbind(dim=2), b.unbind(dim=2)))
print(torch.cat(list(x), dim=1))

# there is probably no need to do unbind, can just stack directly then view;
a = torch.ones([3, 4])
b = torch.zeros([3, 4])
print(torch.stack((a, b), dim=2).view(3, 8))
print(torch.stack((a, b), dim=1).view(6, 4))

# or simply use rearrange; note that to interleave must always put `t` in the last within the brackets, e.g., (w t), not (t w)
from einops import rearrange
a = torch.ones([3, 4])
b = torch.zeros([3, 4])
print(rearrange([a, b], 't h w -> h (w t)'))
print(rearrange([a, b], 't h w -> (h t) w'))
# how about more dimensions?
a = torch.ones([2, 3, 4])
b = torch.zeros([2, 3, 4])
print(rearrange([a, b], 't c h w -> c h (w t)'))    # interleave along dim = 2, shape = [2,3,8]
print(rearrange([a, b], 't c h w -> c (h t) w'))    # interleave along dim = 1, shape = [2,6,4]
print(rearrange([a, b], 't c h w -> (c t) h w'))    # interleave along dim = 0, shape = [4,3,3]

In [None]:
""" concat two tensors along dim then interleave them along the same dimension. """
import torch

a = torch.ones([2, 3])
b = torch.zeros([2, 3])
torch.stack((a, b), dim=-1).view(2, 6)

In [None]:
""" .contiguous() """
import torch

x = torch.randint(0, 9, (2, 3, 4))
x1 = x.contiguous()

# see this post: https://stackoverflow.com/questions/48915810/pytorch-what-does-contiguous-do
# .contiguous() just makes a copy of the tensor that has the same memory layout as the tensor's shape
# normally not needed to call explicitly


In [None]:
""" torch.ones_like() """
import torch

x = torch.randint(0, 9, (2, 3, 4))
print(x.shape)
y = torch.ones_like(x)
print(y)
print(y.shape)
# torch.ones_like() is equivalent to:
z = torch.ones(x.size(), dtype=x.dtype, layout=x.layout, device=x.device)
print(z)

In [None]:
import torch

batch_size = 3
seq_len = 10
num_heads = 5

mask = torch.ones((batch_size, seq_len))
print(mask.shape)
mask = mask.int()[:, None, :].repeat(1, num_heads, 1).reshape(batch_size * num_heads, seq_len)
print(mask.shape)

In [None]:
""" use tensor slicing to reshape, reduce or broadcast tensors """
import torch

x = torch.ones((10, 3, 5, 12))
print(x.shape)
x1 = x[:, 0, 0, :]
print(x1.shape)
x2 = x[:, 0, 1, :]      # if a dim is a single integer, that dimension is reduced in returned tensor
print(x2.shape)
x3 = x2[:, None, :]     # use an added dimension w/t `None` to add a dummy dimension
print(x3.shape)
x4 = x[:, 0, 0:1, :]    # or use i:j slicing notation to avoid reducing that dimension
print(x4.shape)

y = torch.ones(2, 3)
print(y.shape)
y1 = y[:,:,:10]

In [None]:
""" tensor > 0 """
import torch

x = torch.randn(2, 3, 4)
print(x)
x1 = x > 0
print(x1)
print(x1.int())         # convert False->0, True->1

In [None]:
""" convert a list of numbers with torch.tensor() """
# note: lower-case `t`, not torch.Tensor() !
import torch

x = [[1, 2, 3], [4, 5,6], [7,8,9]]
x = [torch.tensor(b, dtype=torch.int32) for b in x]
print(x)
torch.stack(x)

In [None]:
""" stack a list of tensors with padding to the max length of the tensors. """