# Tensor

In [1]:
from importlib.metadata import version

pkgs = [
    "torch",
    "numpy"
]

for p in pkgs:
    print(f"{p} version: {version(p)}")

torch version: 2.6.0+cu124
numpy version: 1.26.4


In [2]:
import torch

## 1 Create a tensor

### 1.1 Create a tensor by PyTorch

#### 1.1.1 create a tensor by a constant

In [3]:
constant = torch.tensor(1)

print(constant)

tensor(1)


#### 1.1.2 create a tensor by list

In [4]:
x = torch.tensor([[1, 2, 3]])

print(x)

tensor([[1, 2, 3]])


In [5]:
y = torch.tensor([1])

print(y)

tensor([1])


In [6]:
z = torch.tensor([1, 2, 3])

print(z)

tensor([1, 2, 3])


#### 1.1.3 using torch.ones to create all one tensor

In [7]:
one = torch.ones((1, 2))

print(one)

tensor([[1., 1.]])


#### 1.1.4 using torch.zeros to create all zero tensor

In [8]:
zero = torch.zeros((2, 3, 4))

print(zero)

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])


#### 1.1.5 using torch.empty create empty tensor

It actually use present the memory's value as the value of a tensor

In [9]:
empty = torch.empty((2, 3))

print(empty)

tensor([[2.9334e-11, 1.5554e-42, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00]])


#### 1.1.6 using torch.arange to create a sequence tensor

In [10]:
range_tensor = torch.arange(12)

print(range_tensor)

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])


using `reshape` function to change its shape

In [11]:
_2_6 = torch.arange(12).reshape(2, 6)
_3_4 = torch.arange(12).reshape(3, 4)

print("(2, 6)", _2_6)
print("(3, 4)", _3_4)

(2, 6) tensor([[ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11]])
(3, 4) tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])


In [12]:
try:
    err = torch.arange(12).reshape(2, 4)
    print(err)
except Exception as e:
    print("error:", e)

error: shape '[2, 4]' is invalid for input of size 12


In [13]:
try:
    err = torch.arange(12).reshape(2, 7)
    print(err)
except Exception as e:
    print("error:", e)

error: shape '[2, 7]' is invalid for input of size 12


#### 1.1.7 create tensor by torch.ones_like

In [14]:
ones_like = torch.ones_like(x)

print(ones_like)

tensor([[1, 1, 1]])


#### 1.1.8 create tensor by torch.rand_like

In [15]:
rand_like = torch.rand_like(x, dtype=torch.float16)

print(rand_like)

tensor([[0.1890, 0.8105, 0.4644]], dtype=torch.float16)


#### 1.1.9 create tensor by torch.rand

In [16]:
rand = torch.rand((2, 3))

print(rand)

tensor([[0.9292, 0.0358, 0.6974],
        [0.5548, 0.5014, 0.0052]])


#### 1.1.10 create tensor by torch.randn

In [17]:
randn = torch.randn((3, 4))

print(randn)

tensor([[-1.0550, -0.2906, -0.5086,  1.3968],
        [ 2.2449,  0.9106,  0.7223,  0.3703],
        [-0.7633,  0.9351,  1.2773, -0.3690]])


#### 1.1.11 create a tensor by comparison

In [18]:
cmp_tensor = torch.tensor([[1, 4, 6]])
bool_tensor_1 = x == x
bool_tensor_2 = x == cmp_tensor
print("x:\n", x)
print("bool_tensor_1:\n", bool_tensor_1)
print("cmp_tensor:\n", cmp_tensor)
print("bool_tensor_2:\n", bool_tensor_2)

x:
 tensor([[1, 2, 3]])
bool_tensor_1:
 tensor([[True, True, True]])
cmp_tensor:
 tensor([[1, 4, 6]])
bool_tensor_2:
 tensor([[ True, False, False]])


### 1.2 Create a tensor(ndarray) by NumPy

In [19]:
import numpy as np

In [20]:
nx = np.array([1, 2, 3])

print(nx)

[1 2 3]


In [21]:
ny = np.array([[1, 2, 3], [4, 5, 6]])

print(ny)

[[1 2 3]
 [4 5 6]]


In [22]:
nz = np.array((1, 2, 3))

print(nz)

[1 2 3]


In [23]:
nones = np.ones((2, 2, 3))

print(nones)

[[[1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]]]


In [24]:
nzeros = np.zeros((3, 2, 2))

print(nzeros)

[[[0. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 0.]]]


In [25]:
nempty = np.empty((2, 3))

print(nempty)

[[1.28244557e+131 2.41502121e+198 6.78875992e+199]
 [7.26612806e+223 3.58338804e+246 2.72657925e-310]]


In [26]:
narange = np.arange(12)

print(narange)

[ 0  1  2  3  4  5  6  7  8  9 10 11]


## 1.3 Convert ndarray to tensor

In [27]:
print("nrange.type:", type(narange))

nd2tensor = torch.from_numpy(narange)

print("nd2tensor.type:", type(nd2tensor))

nrange.type: <class 'numpy.ndarray'>
nd2tensor.type: <class 'torch.Tensor'>


### 1.4 create tensor defining its data type and device on

Here we create a tensor on GPU(cuda)

In [28]:
tensor_cuda = torch.tensor([1, 2, 3], dtype=torch.float16, device="cuda")

print(tensor_cuda)

tensor([1., 2., 3.], device='cuda:0', dtype=torch.float16)


Then, we create a tensor on CPU, it's also the default device

In [29]:
tensor_cpu = torch.tensor([4, 5, 6], dtype=torch.long, device="cpu")

print(f"tensor_cpu: {tensor_cpu}\n"
      f"tensor_cpu.dtype: {tensor_cpu.dtype}\n"
      f"tensor_cpu.device: {tensor_cpu.device}")

tensor_cpu: tensor([4, 5, 6])
tensor_cpu.dtype: torch.int64
tensor_cpu.device: cpu


## 2 Tensor dimension

### 2.1 0d tensor
We can also call it a scaler.

In [30]:
scaler = torch.tensor(3.14)

print("0d tensor:", scaler)

0d tensor: tensor(3.1400)


### 2.2 1d tensor
We can also call it vector.

In [31]:
vec = torch.tensor([1, 2, 3])

print("1d tensor", vec)

1d tensor tensor([1, 2, 3])


### 2.3 2d tensor
We can also call it matrix

In [32]:
matrix = torch.tensor([[1, 2, 3],
                       [4, 5, 6]])

print("2d tensor:", matrix)

2d tensor: tensor([[1, 2, 3],
        [4, 5, 6]])


### 2.4 nd tensor

Actually the dimension of a tensor is to look the depth of the brackets.
Like the 4d tensor here:
$$
\text{4dtensor} = [[[[1, 2, 3, 4]]]]
$$
It's actually have four depth of brackets.
And it can be expanded, but it's also the 4d tensor, for it's depth of brackets not changed.
$$
\text{4dtensor} = [[[[1, 2, 3, 4]]], \\
                  [[[5, 6, 7, 8]]]]
$$

## 3 Tensor operation

### 3.1 arithmetic operations

In [33]:
print("x:", x)
print("y:", y)
print("x + y:", x + y)

x: tensor([[1, 2, 3]])
y: tensor([1])
x + y: tensor([[2, 3, 4]])


Here exist the `broadcasting`. When resolve this two different dimension of tensors, PyTorch will expand `y` 1 ==> `y`[[1, 1, 1]] then add x
$$
y = 1 \ \text{then, expand} \\
y = [[1, 1, 1]] \\
x + y = \text{[[1, 2, 3]] + [[1, 1, 1]] = [[2, 3, 4]]}
$$

In [34]:
print("x:", x)
print("y:", y)

print("x * y:", x * y)

x: tensor([[1, 2, 3]])
y: tensor([1])
x * y: tensor([[1, 2, 3]])


Here, also exists the `broadcasting`.

In [35]:
print(x * torch.tensor([2]))

tensor([[2, 4, 6]])


In [36]:
print("y:", y)
print("x:", x)
print("y / x:", y / x)

y: tensor([1])
x: tensor([[1, 2, 3]])
y / x: tensor([[1.0000, 0.5000, 0.3333]])


In [37]:
print("y:", y)
print("x:", x)
print("y - x:", y - x)

y: tensor([1])
x: tensor([[1, 2, 3]])
y - x: tensor([[ 0, -1, -2]])


Attention: all these operations are bitwise.
And we can actually take another example.

In [38]:
print("x:\n", x)
print("torch.exp(x):\n", torch.exp(x))

x:
 tensor([[1, 2, 3]])
torch.exp(x):
 tensor([[ 2.7183,  7.3891, 20.0855]])


As we know $e = 2.7183...$. Here actually do this:
$$
\text{x} = [[1, 2, 3]] \\
\text{torch.exp(x)} = [[e^1, e^2, e^3]]
$$
It's also the bitwise operation.

#### 3.2 tensor multiply

#### 3.2.1 vec $\times$ vec

In [39]:
vec1 = torch.tensor([1, 3, 5])
vec2 = torch.tensor([2, 4, 6])

print("vec1:", vec1)
print("vec2:", vec2)
print("vec1 x vec2:", torch.dot(vec1, vec2))

vec1: tensor([1, 3, 5])
vec2: tensor([2, 4, 6])
vec1 x vec2: tensor(44)


It's a little bit differ from tensor $\times$ tensor. It finally will do a sum.
Let's have a look

In [40]:
tmp_vec = vec1 * vec2
print("vec1:\n", vec1)
print("vec2:\n", vec2)
print("tmp_vec:\n", tmp_vec)
print("torch.dot(vec1, vec2):\n", torch.dot(vec1, vec2))
print("tmp_vec.sum():\n", tmp_vec.sum())

vec1:
 tensor([1, 3, 5])
vec2:
 tensor([2, 4, 6])
tmp_vec:
 tensor([ 2, 12, 30])
torch.dot(vec1, vec2):
 tensor(44)
tmp_vec.sum():
 tensor(44)


#### 3.2.2 matrix $\times$ matrix

In [41]:
matrix1 = torch.tensor([[1, 3],
                        [5, 7]])
matrix2 = torch.tensor([[0, 2],
                        [4, 6]])
print("matrix1:\n", matrix)
print("matrix2:\n", matrix2)
print("matrix1 x matrix2:\n", torch.matmul(matrix1, matrix2))
print("matrix1 x matrix2:\n", matrix1 @ matrix2)

matrix1:
 tensor([[1, 2, 3],
        [4, 5, 6]])
matrix2:
 tensor([[0, 2],
        [4, 6]])
matrix1 x matrix2:
 tensor([[12, 20],
        [28, 52]])
matrix1 x matrix2:
 tensor([[12, 20],
        [28, 52]])


So, we will know that `torch.matuml` == `@`

### 3.3 tensor cat

In [42]:
print("x:", x)
print("y:", y)
try:
    print("torch.cat((x, y)):", torch.cat((x, y)))
except Exception as e:
    print("error:", e)

x: tensor([[1, 2, 3]])
y: tensor([1])
error: Tensors must have same number of dimensions: got 2 and 1


Cause here the y = 1 is a constant.

In [43]:
print("x:", x)
print("z:", z)
try:
    print("torch.cat((x, z)):", torch.cat((x, z)))
except Exception as e:
    print("error:", e)

x: tensor([[1, 2, 3]])
z: tensor([1, 2, 3])
error: Tensors must have same number of dimensions: got 2 and 1


Attention here. x = [[1, 2, 3]] and z = [1, 2, 3]. x has two brackets rather z has one.
And in PyTorch, we could check the dimension of a tensor by using `tensor.shape`

In [44]:
print("x.shape:", x.shape)
print("z.shape:", z.shape)

x.shape: torch.Size([1, 3])
z.shape: torch.Size([3])


From above two examples, we know we just can cat the two tensor that has the same dimension and either one can not be a constant.

In [45]:
correct_shape_tensor = torch.tensor([[4, 5, 6]])
print("x:", x)
print("correct_shape_tensor:", correct_shape_tensor)
print("torch.cat((x, correct_shape_tensor)):\n", torch.cat((x, correct_shape_tensor)))

x: tensor([[1, 2, 3]])
correct_shape_tensor: tensor([[4, 5, 6]])
torch.cat((x, correct_shape_tensor)):
 tensor([[1, 2, 3],
        [4, 5, 6]])


Let's check for their shape

In [46]:
print("x.shape:", x.shape)
print("correct_shape_tensor:", correct_shape_tensor.shape)

x.shape: torch.Size([1, 3])
correct_shape_tensor: torch.Size([1, 3])


And let's do another try.

In [47]:
err_shape_tensor = torch.tensor([[2, 3]])
print("x:", x)
print("err_shape_tensor:", err_shape_tensor)
try:
    print("torch.cat((x, err_shape_tensor)):", torch.cat((x, err_shape_tensor)))
except Exception as e:
    print("err:", e)

x: tensor([[1, 2, 3]])
err_shape_tensor: tensor([[2, 3]])
err: Sizes of tensors must match except in dimension 0. Expected size 3 but got size 2 for tensor number 1 in the list.


Here it notes us our dimension 0 is matched, but not for all. Let's have a look of them

In [48]:
print("x.shape:", x.shape)
print("err_shape_tensor:", err_shape_tensor.shape)

x.shape: torch.Size([1, 3])
err_shape_tensor: torch.Size([1, 2])


So, next time, you cat two tensor, you know what to do. Check their shape by `tensor.shape` and they must to be the same.

Oh, may you forget what torh.cat do. Let's have a retry.

In [49]:
simple_x = torch.tensor([1, 2, 3])
simple_y = torch.tensor([4, 5, 6])

print("simple_x:\n", simple_x)
print("simple_y:\n", simple_y)
print("torch.cat((simple_x, simple_y)):\n", torch.cat((simple_x, simple_y)))

simple_x:
 tensor([1, 2, 3])
simple_y:
 tensor([4, 5, 6])
torch.cat((simple_x, simple_y)):
 tensor([1, 2, 3, 4, 5, 6])


We could see, it's just concatenate them together on the same dimension. Just like what we do in Python list's append.
And let's have a look at a complex one. It's the same actually.

In [50]:
complex_x = torch.arange(24).reshape(2, 3, 4)
complex_y = torch.ones(24).reshape(2, 3, 4)

print("complex_x:\n", complex_x)
print("correct_y:\n", complex_y)
print("torch.cat((complex_x, complex_y)):\n", torch.cat((complex_x, complex_y)))

complex_x:
 tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])
correct_y:
 tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])
torch.cat((complex_x, complex_y)):
 tensor([[[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.]],

        [[12., 13., 14., 15.],
         [16., 17., 18., 19.],
         [20., 21., 22., 23.]],

        [[ 1.,  1.,  1.,  1.],
         [ 1.,  1.,  1.,  1.],
         [ 1.,  1.,  1.,  1.]],

        [[ 1.,  1.,  1.,  1.],
         [ 1.,  1.,  1.,  1.],
         [ 1.,  1.,  1.,  1.]]])


Have a look at their shape.

In [51]:
print("complex_x.shape:\n", complex_x.shape)
print("correct_y.shape:\n", complex_y.shape)
print("torch.cat((complex_x, complex_y)).shape:\n", torch.cat((complex_x, complex_y)).shape)

complex_x.shape:
 torch.Size([2, 3, 4])
correct_y.shape:
 torch.Size([2, 3, 4])
torch.cat((complex_x, complex_y)).shape:
 torch.Size([4, 3, 4])


We can see, they have the same dimension like [x, y, z], but the dimension value is a little bit differed.
<br>
In other words, after `torch.cat`, the brackets depth is not changed.
<br>
In my understanding, the two tensor are like lines, and you just lengthen them in their length. 

If you care about the num of brackets, and you will see the last output like [[[x, y]]].

### 3.4 tensor stack

This is another operation we usually do in LLM.
Let's deep it.

In [52]:
stack_on = torch.tensor([1, 2, 3])
stack_below = torch.tensor([4, 5, 6])

print("stack_on:\n", stack_on)
print("stack_below:\n", stack_below)
print("torh.cat((stack_on, stack_below)):\n", torch.stack((stack_on, stack_below)))

stack_on:
 tensor([1, 2, 3])
stack_below:
 tensor([4, 5, 6])
torh.cat((stack_on, stack_below)):
 tensor([[1, 2, 3],
        [4, 5, 6]])


And we check for their shape.

In [53]:
print("stack_on.shape:\n", stack_on.shape)
print("stack_below:\n", stack_below.shape)
print("torch.stack((stack_on, stack_below)):\n", torch.stack((stack_on, stack_below)).shape)

stack_on.shape:
 torch.Size([3])
stack_below:
 torch.Size([3])
torch.stack((stack_on, stack_below)):
 torch.Size([2, 3])


And you will see, actually, their dimension changed. It's like stack the building blocks.
> ⚠
the tensors want to be stacked still must have the same dimension

### 3.5 item()

Especially, when a tensor is a constant or has only one element, we can use item() function to abtain its value, may int or float

Let's have a look

#### 3.5.1 convert constant tensor to value

In [54]:
int_constant = torch.tensor(3)

integer = int_constant.item()

print(f"integer: {integer}\n"
      f"integer.type {type(integer)}")

integer: 3
integer.type <class 'int'>


In [55]:
float_constant = torch.tensor(3.14)

float = float_constant.item()

print(f"float: {float}\n"
      f"float.type: {type(float)}")

float: 3.140000104904175
float.type: <class 'float'>


#### 3.5.2 conver single element tensor to value

In [56]:
elem_tensor = torch.tensor([8.9])

elem = elem_tensor.item()

print(f"elem: {elem}\n"
      f"elem.type: {type(elem)}")

elem: 8.899999618530273
elem.type: <class 'float'>


In [57]:
_elem_tensor = torch.tensor([[5.2]])

_elem = _elem_tensor.item()

print(f"_elem: {_elem}\n"
      f"_elem.type: {type(_elem)}")

_elem: 5.199999809265137
_elem.type: <class 'float'>


Here we note that even it's brackets $depth = 2$, but we can also convert it for it's single element
<br>
And we can have another try

In [58]:
ant_elem_tensor = torch.tensor([[[[[13.14]]]]])

ant_elem = ant_elem_tensor.item()

print(f"ant_elem: {ant_elem}\n"
      f"ant_elem.type: {type(ant_elem)}")

ant_elem: 13.140000343322754
ant_elem.type: <class 'float'>


### 3.6 view()

In [59]:
orig = torch.arange(12)
print("orig:", orig)

orig: tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])


Let's apply `view()` to it.

In [60]:
view = orig.view(3, 4)
print("view:", view)

view: tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])


Actually it's the same with `reshape()`

In [61]:
reshape = orig.reshape(3, 4)
print("reshape:", reshape)
print("view == reshape:", view == reshape)

reshape: tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
view == reshape: tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])
