In [1]:
import torch
import numpy as np

## Create tensor from list or numpy array

In [10]:
# Create a tensor from a list
a = [1,2,3]
t_a = torch.tensor(a)
print(t_a)
print(t_a[0])
# Fetch an element
print(t_a[0].item())
# Convert to a list
print(t_a.tolist())
# Convert to a numpy array
print(t_a.numpy())
# The default type is int64
print(t_a.dtype)

tensor([1, 2, 3])
tensor(1)
1
[1, 2, 3]
[1 2 3]
torch.int64


In [14]:
# Create a tensor from a numpy array
b = np.array([4,5,6])
t_b = torch.tensor(b)
print(t_b)
# Note that the type is int32
print(b.dtype)
print(t_b.dtype)

tensor([4, 5, 6], dtype=torch.int32)
int32
torch.int32


About types:   
The default element type after convertion from a list and from an array are different. The reason is the difference between the two sources. The default type of the integer elements in the list 'a' is int64 (This will show as 'int' if we print its type. In fact, this default type depends on platform and python version). On the other hand, the default type of the elements in the numpy array 'b' is int32, at least on this laptop. torch.tensor does not change the data types. It converts the original type to the corresponding type in torch: int64-->torch.int64, int32-->torch.int32  

In [16]:
# We can specify the data type manually
c = np.array([4,5,6], dtype=np.int64)
t_c = torch.tensor(c)
# When printing t_c, the data type of the tensor is not shown. This is because it has the default data type, which is torch.int64
print(t_c)
print(t_c.dtype)

tensor([4, 5, 6])
torch.int64


In [18]:
# Tensor whose elements are all 1's
t_ones = torch.ones(2,3)
print(t_ones)
print(t_ones.dtype)

# Choose the type manually
# int64
t_ones = torch.ones((2,3), dtype=torch.int64)
print(t_ones)
print(t_ones.dtype)
# float64
t_ones = torch.ones((2,3), dtype=torch.float64)
print(t_ones)
print(t_ones.dtype)

tensor([[1., 1., 1.],
        [1., 1., 1.]])
torch.float32
tensor([[1, 1, 1],
        [1, 1, 1]])
torch.int64
tensor([[1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
torch.float64


In [16]:
# A random tensor
# Each element is sampled uniformly between 0 and 1
t_rand = torch.rand(2,3)
print(t_rand)
print(t_rand.dtype)

tensor([[0.4436, 0.6921, 0.8738],
        [0.1679, 0.3789, 0.4707]])
torch.float32


## Change data types

In [19]:
print(t_a.dtype)
t_a_int32 = t_a.to(torch.int32)
print(t_a_int32)

torch.int64
tensor([1, 2, 3], dtype=torch.int32)


## Change shape

In [20]:
# Transpose
t = torch.rand(2,3)
t_transpose = torch.transpose(t, 0, 1) # Switch dimension 0 and 1.
print(t)
print(t_transpose)
print(t.shape)
print(t_transpose.shape)

tensor([[0.2194, 0.8422, 0.9596],
        [0.4180, 0.2855, 0.7158]])
tensor([[0.2194, 0.4180],
        [0.8422, 0.2855],
        [0.9596, 0.7158]])
torch.Size([2, 3])
torch.Size([3, 2])


In [21]:
# torch.transpose does not copy the original tensor
# It just provides another way of indexing
t = torch.ones(2,3)
t_transpose = torch.transpose(t, 0, 1)
print(t)
print(t_transpose)
t_transpose[1,1] = 10
print(t_transpose)
print(t)

tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[ 1.,  1.],
        [ 1., 10.],
        [ 1.,  1.]])
tensor([[ 1.,  1.,  1.],
        [ 1., 10.,  1.]])


In [23]:
# Another way of tranposition
t = torch.ones(2,3)
t_transpose = t.transpose(0,1)
print(t)
print(t_transpose)
# This method does not create a new tensor
t_transpose[1,1] = 10
print(t)
print(t_transpose)

tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[ 1.,  1.,  1.],
        [ 1., 10.,  1.]])
tensor([[ 1.,  1.],
        [ 1., 10.],
        [ 1.,  1.]])


In [25]:
# Reshape
t = torch.ones(2*3)
t_reshape = t.reshape(2,3)
print(t)
print(t_reshape)
# This method does not create a new tensor
t_reshape[0,0] = 100
print(t)
print(t_reshape)

tensor([1., 1., 1., 1., 1., 1.])
tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([100.,   1.,   1.,   1.,   1.,   1.])
tensor([[100.,   1.,   1.],
        [  1.,   1.,   1.]])


In [36]:
# Squeeze: remove dimensions of size 1
t = torch.zeros(1,2,1,4,1)
# Remove all dimensions of size 1
t_squeeze = torch.squeeze(t) 
print(t.shape)
print(t_squeeze.shape)
# Remove the size 1 dimension at the chosen dimensions
print(torch.squeeze(t, 0).shape)
print(torch.squeeze(t, 2).shape)
print(torch.squeeze(t, 4).shape)
print(torch.squeeze(t, (0, 2)).shape)
# If the chosen dimension is not of size 1, nothing will happen
print(torch.squeeze(t, 1).shape)

torch.Size([1, 2, 1, 4, 1])
torch.Size([2, 4])
torch.Size([2, 1, 4, 1])
torch.Size([1, 2, 4, 1])
torch.Size([1, 2, 1, 4])
torch.Size([2, 4, 1])
torch.Size([1, 2, 1, 4, 1])


## Mathematical operations

In [37]:
# Setting the random seed to a fixed value ensures that the same sequence of random numbers will be generated every time the code is run.
torch.manual_seed(1)
# Create a tensor whose entries are in [-1, 1)
# torch.rand returns a tensor whose entries are from the uniform distribution on [0,1)
t1 = 2 * torch.rand(5,2) - 1 
print(t1)
# Create a tensor whose entries follow the standard normal distribution
t2 = torch.normal(mean=0, std=1, size=(5,2))
print(t2)

tensor([[ 0.5153, -0.4414],
        [-0.1939,  0.4694],
        [-0.9414,  0.5997],
        [-0.2057,  0.5087],
        [ 0.1390, -0.1224]])
tensor([[ 0.8590,  0.7056],
        [-0.3406, -1.2720],
        [-1.1948,  0.0250],
        [-0.7627,  1.3969],
        [-0.3245,  0.2879]])


In [39]:
# 3 ways of elementwise product
print( t1 * t2)
print( torch.mul(t1, t2))
print( torch.multiply(t1, t2) )

tensor([[ 0.4426, -0.3114],
        [ 0.0660, -0.5970],
        [ 1.1249,  0.0150],
        [ 0.1569,  0.7107],
        [-0.0451, -0.0352]])
tensor([[ 0.4426, -0.3114],
        [ 0.0660, -0.5970],
        [ 1.1249,  0.0150],
        [ 0.1569,  0.7107],
        [-0.0451, -0.0352]])
tensor([[ 0.4426, -0.3114],
        [ 0.0660, -0.5970],
        [ 1.1249,  0.0150],
        [ 0.1569,  0.7107],
        [-0.0451, -0.0352]])


In [40]:
# Mean along a given axis
print(torch.mean(t1, axis=0))
print(torch.mean(t1, axis=1))
# Mean of all entries
print(torch.mean(t1))

tensor([-0.1373,  0.2028])
tensor([ 0.0369,  0.1378, -0.1709,  0.1515,  0.0083])
tensor(0.0327)


In [42]:
# Matrix multiplication
# using torch.matmul
print( torch.matmul(t1, torch.transpose(t2, 0, 1)) )
# using @
print(t1 @ torch.transpose(t2, 0, 1))

# To transpose t2, we can also use t2.transpose(0, 1)
print( torch.matmul(t1, t2.transpose(0, 1)) )
print( torch.matmul(t1.transpose(0, 1), t2) )

tensor([[ 0.1312,  0.3860, -0.6267, -1.0096, -0.2943],
        [ 0.1647, -0.5310,  0.2434,  0.8035,  0.1980],
        [-0.3855, -0.4422,  1.1399,  1.5558,  0.4781],
        [ 0.1822, -0.5771,  0.2585,  0.8676,  0.2132],
        [ 0.0330,  0.1084, -0.1692, -0.2771, -0.0804]])
tensor([[ 0.1312,  0.3860, -0.6267, -1.0096, -0.2943],
        [ 0.1647, -0.5310,  0.2434,  0.8035,  0.1980],
        [-0.3855, -0.4422,  1.1399,  1.5558,  0.4781],
        [ 0.1822, -0.5771,  0.2585,  0.8676,  0.2132],
        [ 0.0330,  0.1084, -0.1692, -0.2771, -0.0804]])
tensor([[ 0.1312,  0.3860, -0.6267, -1.0096, -0.2943],
        [ 0.1647, -0.5310,  0.2434,  0.8035,  0.1980],
        [-0.3855, -0.4422,  1.1399,  1.5558,  0.4781],
        [ 0.1822, -0.5771,  0.2585,  0.8676,  0.2132],
        [ 0.0330,  0.1084, -0.1692, -0.2771, -0.0804]])
tensor([[ 1.7453,  0.3392],
        [-1.6038, -0.2180]])


In [48]:
# Norms
# Frobenius norm (the default norm, the square root of the sum of the squares of the elements)
print( torch.linalg.norm(t1) )
# L1 norm (see the next cell for its definition)
print( torch.linalg.norm(t1, ord=1) )
# L1 norm along a specific dimension
print( t1.shape )
print( torch.linalg.norm(t1, ord=1, dim=0) )
print( torch.linalg.norm(t1, ord=1, dim=1) )

tensor(1.5165)
tensor(2.1417)
torch.Size([5, 2])
tensor([1.9953, 2.1417])
tensor([0.9566, 0.6632, 1.5412, 0.7145, 0.2615])


In [56]:
# Validation of the norms
# Validate the Frobenius norm
print("Frobenius direct calculation", torch.sqrt(torch.sum(t1 * t1)))
print("Frobenius", torch.linalg.norm(t1) )

# Validate the L1 norm
# The L1 norm is not the sum of the absolute values of all elements
# The L1 norm is :torch.max(torch.sum(torch.abs(t1), dim=0))
# i.e.: max(sum(abs(t1), dim=0))
# Namely, we take absolute values of all elements, them add along the first dimension (along each column), then we select the maximum sum as the L1 norm
print("Sum of absolute values", torch.max( torch.sum( torch.abs(t1), dim=0 ) )  )
print("L1", torch.linalg.norm(t1, ord=1))


Frobenius direct calculation tensor(1.5165)
Frobenius tensor(1.5165)
Sum of absolute values tensor(2.1417)
L1 tensor(2.1417)


In [57]:
# Another example of L1 norm
t = torch.tensor([[1,2],[3,4]], dtype=torch.float32)
print(t)
print(torch.linalg.norm(t, ord=1))
# Obviously, the result is not the sum of all absolute values.

tensor([[1., 2.],
        [3., 4.]])
tensor(6.)


## Split and join tensors

In [71]:
t = torch.rand(6)
print(t)
# Split into three parts
print( torch.chunk(t, 3), '\n' )
# When the number of chunks cannot divide the total number of elements, the result can be weird
# The following code tries to divide t into 4 parts. However, the result is a list of 3 tensors
# The behavior of torch.chunk() in this situation is to create as many equal-sized chunks 
# as possible without leaving any elements unused. 
# With 6 elements, it can create 3 chunks of 2 elements each, 
# which is the closest it can get to the requested 4 chunks while maintaining equal sizes.
print( torch.chunk(t, 4), '\n' )

# The last tensor of the split may be smaller that the others
print( torch.chunk(torch.rand(7), 2) )
print( torch.chunk(torch.rand(7), 3) )
print( torch.chunk(torch.rand(7), 4) )


tensor([0.4828, 0.0281, 0.1782, 0.2079, 0.2861, 0.8555])
(tensor([0.4828, 0.0281]), tensor([0.1782, 0.2079]), tensor([0.2861, 0.8555])) 

(tensor([0.4828, 0.0281]), tensor([0.1782, 0.2079]), tensor([0.2861, 0.8555])) 

(tensor([0.3366, 0.1264, 0.6924, 0.6601]), tensor([0.8238, 0.2413, 0.6084]))
(tensor([0.3180, 0.3877, 0.1015]), tensor([0.2721, 0.3469, 0.7138]), tensor([0.5913]))
(tensor([0.6235, 0.9991]), tensor([0.9873, 0.8410]), tensor([0.5159, 0.1541]), tensor([0.8908]))


In [73]:
# Specify split size of each chunk
t = torch.rand(5)
print(t)
print( torch.split(t, split_size_or_sections=[3,2]) )
# Specify the size of all chunks except for the last one, which may be smaller
print( torch.split(t, split_size_or_sections=2) )

tensor([0.9595, 0.0677, 0.1103, 0.4830, 0.2296])
(tensor([0.9595, 0.0677, 0.1103]), tensor([0.4830, 0.2296]))
(tensor([0.9595, 0.0677]), tensor([0.1103, 0.4830]), tensor([0.2296]))


In [78]:
# Concatenate
A = torch.ones(3)
B = torch.zeros(2)
C = torch.cat([A,B], axis=0)
print(A)
print(B)
print(C)

tensor([1., 1., 1.])
tensor([0., 0.])
tensor([1., 1., 1., 0., 0.])


In [79]:
# Stack
A = torch.ones(3)
B = torch.zeros(3)
C = torch.stack([A,B], axis=1)
print(A)
print(B)
print(C)
C = torch.stack([A,B], axis=0)
print(C)

tensor([1., 1., 1.])
tensor([0., 0., 0.])
tensor([[1., 0.],
        [1., 0.],
        [1., 0.]])
tensor([[1., 1., 1.],
        [0., 0., 0.]])


In [77]:
# Explanation from ChatGPT
# torch.cat concatenates a sequence of tensors along an existing dimension
# torch.stack concatenates a sequence of tensors along a new dimension
# Therefore, C = torch.cat([A,B], axis=1) will not work, since the axis=1 does not exist
# But torch.stack([A,B], axis=1) will work

In [80]:
# In order to stack two row vectors using concatenation, we need to define them as 2D tensor
E = torch.tensor([[1,2,3]])
F = torch.tensor([[4,5,6]])
G = torch.cat([E,F], axis=0)
print(E)
print(F)
print(G)

tensor([[1, 2, 3]])
tensor([[4, 5, 6]])
tensor([[1, 2, 3],
        [4, 5, 6]])


In [39]:
# However, in this situation, torch.stack will not give desired results since it always add a new dimension
E = torch.tensor([[1,2,3]])
F = torch.tensor([[4,5,6]])
G = torch.stack([E,F], axis=0)
print(G)
print(G.shape)
G = torch.stack([E,F], axis=1)
print(G)
print(G.shape)

tensor([[[1, 2, 3]],

        [[4, 5, 6]]])
torch.Size([2, 1, 3])
tensor([[[1, 2, 3],
         [4, 5, 6]]])
torch.Size([1, 2, 3])


It is better to understand torch.stack from a algebraic view. The shape of E and F is both (1,3). 'torch.stack([E,F], axis=0)' adds a new dimension as the first dimension, so that the result has shape (2,1,3). Thus, G[0]==E, G[1]==F. 'torch.stack([E,F], axis=1)' adds a new dimension as the second dimension, so that the result has shape (1,2,3). In this case, G[:,0,:]==E and G[:,1,:]==F. See below

In [83]:
G = torch.stack([E,F], axis=0)
print( G[0]==E )
print( G[1]==F )

G = torch.stack([E,F], axis=1)
print( G[:,0,:]==E )
print( G[:,1,:]==F )

tensor([[True, True, True]])
tensor([[True, True, True]])
tensor([[True, True, True]])
tensor([[True, True, True]])


## Summary:
(1) Create tensors from list, numpy array.   
(2) Change datatype.   
(3) Transpose, reshape.   
(4) Mathematical operations.    
(5) Chunk, split.   
(5) Concatenate, stack.    