In [1]:
import torch
import numpy as np

In [2]:
torch.__version__

'2.9.1+cu126'

## Introduction to tensors

Tensor is a way to represent data in numerical form 

Tensor is an n-dimensional array of numbers(multidimentional array) 

* Scalar - a single number 
* Vector - a number which have magnitude and direction 
* Matrix - 2D array of numbers

In [3]:
#Scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [4]:
# ndim will let you know the dimension of the tensor 
scalar.ndim

0

In [5]:
# Get tensor back as python int
scalar.item()

7

In [6]:
#vecotor 

vector = torch.tensor([5,5])
vector

tensor([5, 5])

In [7]:
# vector is 1-dimensional tensor
vector.ndim

1

In [8]:
# Matrix 

matrix = torch.tensor([[7,8],
                      [9,10]])
matrix


tensor([[ 7,  8],
        [ 9, 10]])

In [9]:
# matrix is 2-dimensional tensor
matrix.ndim

2

In [10]:
# This is how we can index into the matrix
matrix[1]

tensor([ 9, 10])

In [11]:
# lets see the shape of the matrix 
matrix.shape

torch.Size([2, 2])

In [12]:
# Tensor

Tensor = torch.tensor([[[1,2,3],
[4,5,6],
[7,8,9]]]) 
Tensor

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [13]:
# Tensor is 3-dimensional tensor
# 1 is the number of samples
# 3 is the number of rows
# 3 is the number of columns
Tensor.shape

torch.Size([1, 3, 3])

In [14]:
Tensor.ndim

3

In [15]:
Tensor[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

##### Tensor can be created from numpy array 

In [16]:
data = [[1,2],[2,3]]
tensor_data = torch.tensor(data)

In [17]:
tensor_data , tensor_data.ndim

(tensor([[1, 2],
         [2, 3]]),
 2)

In [18]:
# from nparray

data_np = np.array(data)
tensor_np = torch.from_numpy(data_np)

In [19]:
tensor_np , tensor_np.ndim

(tensor([[1, 2],
         [2, 3]]),
 2)

##### Tesnor are similar to np array except that tensor can run on gpu's and hardware accelerator
tensor and numpy can share same underlying memory, eliminating the need to copy data 

#### Bridge with numpy

Tensor on the cpu and numpy array can share their underlying memory location , and changing one will change the other

In [20]:
t = torch.ones(5)
print(f"t:{t}")

n = t.numpy()
print(f"n:{n}")

t:tensor([1., 1., 1., 1., 1.])
n:[1. 1. 1. 1. 1.]


In [21]:
# if i make any changes in the tensor , it will also change in the numpy array

t.add_(1)

tensor([2., 2., 2., 2., 2.])

In [22]:
print(f"tensor:{t}")
print(f"numpy_array:{n}") 

tensor:tensor([2., 2., 2., 2., 2.])
numpy_array:[2. 2. 2. 2. 2.]


In [23]:
#vice versa 

n = np.ones(5)
t = torch.from_numpy(n)
print(f"t:{t}")
print(f"n:{n}")

# if i make any changes in the numpy array , it will also change in the tensor

n = np.ones(5)
t = torch.from_numpy(n)
print(f"t:{t}")
print(f"n:{n}")




t:tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
n:[1. 1. 1. 1. 1.]
t:tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
n:[1. 1. 1. 1. 1.]


### Random Tensor

Random tensor - Start with random number 

Why we need random tensor?
In neural network , tesnor start with random numbers because ->

* If all the weights start at zero , neurons would update identically

* Random initialization breaks symmentry so each neuron can learn different patterns

* The distribution matters , to control initial scale 

So basically neural netowork start with random tensor and adujest its value to better represent the data - it is important so the nn will not take any patterns to get better response 

In [24]:
# This will create a random tensor - matrix with random numbers
random_tensor = torch.rand(1,3,4)

In [25]:
random_tensor

tensor([[[0.6588, 0.1531, 0.0935, 0.0121],
         [0.7527, 0.2228, 0.5265, 0.5077],
         [0.6082, 0.7200, 0.7452, 0.7984]]])

In [26]:
random_tensor.size()

torch.Size([1, 3, 4])

#### Representing image in tesnor

In [27]:
random_image_tensor = torch.rand(size=(224,224,3))
random_image_tensor.shape , random_image_tensor.ndim 

(torch.Size([224, 224, 3]), 3)

#### Creating a range of tensors

In [28]:
one_to_ten = torch.arange(start=1 , end=11 , step=1)
one_to_ten

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [29]:
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

#### Tensor Datatypes

In [30]:
# float 32 tensor 

float_32_tensor = torch.tensor([3.0 , 6.0,9.0],
dtype = None, # this is the datatype of the tensor
device = None, # this is the device on which the tensor is stored
requires_grad=False # this is the gradient of the tensor
)

float_32_tensor

# default data type of tensor are float32

tensor([3., 6., 9.])

In [31]:
float_32_tensor.dtype

torch.float32

In [32]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor , float_16_tensor.dtype

(tensor([3., 6., 9.], dtype=torch.float16), torch.float16)

#### Issue we will get into when playing with tensors

* Tensor not right devicetype 
* Tensor not right shape 
* Tensor not on the right device

In [33]:
# Getting information from tensor 

tensor = torch.rand(3,4).type(torch.float16)
tensor

tensor([[0.3855, 0.1351, 0.5190, 0.7305],
        [0.0706, 0.1332, 0.2505, 0.1698],
        [0.7822, 0.1704, 0.1965, 0.1451]], dtype=torch.float16)

In [34]:
# get the datatype of the tensor
tensor.dtype

torch.float16

In [35]:
# get the shape of the tensor
tensor.shape , tensor.size()

(torch.Size([3, 4]), torch.Size([3, 4]))

In [36]:
# get the device of the tensor
tensor.device # by default tensor is on cpu

device(type='cpu')

##### Manipulating Tensors (tensor operations)

Tensor operation include :
* addition 
* subtraction
* Multiplication ( element-wise )
* Division 
* Matrix multiplication

In [37]:
tensor = torch.tensor([1,2,3])
tensor + 10

tensor([11, 12, 13])

In [38]:
tensor * 10

tensor([10, 20, 30])

In [39]:
tensor - 10

tensor([-9, -8, -7])

##### Pytorch in-build function

In [40]:
tensor.mul(10) , tensor.add(10) , tensor.sub(10)
# same same but different
torch.mul(tensor,10) , torch.add(tensor,10) , torch.sub(tensor,10)

(tensor([10, 20, 30]), tensor([11, 12, 13]), tensor([-9, -8, -7]))

##### MATRIX multiplication

* Element wise 
* Matrix multiplication (dot product)

Error we might face:

1. The **inner-diminision** must match:
* `(3,2) * (3,2) won't work 
* `(3,2) * (2,3) will work
* `(2,3) * (3,2) will work

Just basic matrix multiplication rule


In [41]:
# Element wise 
print(tensor , "*" , tensor)
print(f"Equals : {tensor * tensor}")

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals : tensor([1, 4, 9])


In [42]:
# MATRIX multiplication

torch.matmul(tensor, tensor)

tensor(14)

##### Brakedown why matmul(the native torch functions) is faster then writing your own multiplication loop

In [43]:
%%time
value = 0 
for i in range(len(tensor)):
    value += tensor[i] * tensor[i]

value

CPU times: user 204 μs, sys: 37 μs, total: 241 μs
Wall time: 207 μs


tensor(14)

In [44]:
%%time 

torch.matmul(tensor, tensor)

CPU times: user 107 μs, sys: 19 μs, total: 126 μs
Wall time: 163 μs


tensor(14)

#### Shape errors

In [45]:
torch.matmul(torch.rand(3,2),torch.rand(2,3))

# torch.mm(x,y)

tensor([[0.8486, 1.0025, 1.2685],
        [0.7334, 0.5931, 0.8644],
        [0.9043, 1.0605, 1.3451]])

In [46]:
torch.matmul(torch.rand(2,3),torch.rand(3,2))

tensor([[0.8037, 1.4278],
        [0.8294, 1.4316]])

In [47]:
torch.matmul(torch.rand(3,2),torch.rand(3,2)) # shape error 

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

#### Shapes for matrix multiplication

In [48]:
tensor_a = torch.tensor([[1,2],
                         [3,4],
                         [5,8]])
tensor_b = torch.tensor([[5,6],
                         [7,4],
                         [9,10]])

tensor_a.shape , tensor_b.shape

# this can't be multiplied because the inner dimension of tensor_a is not equal to the outer dimension of tensor_b



(torch.Size([3, 2]), torch.Size([3, 2]))

#### to fix our tensor shape issue we can manipulate the shape of our tensor using a transpose 

A transpose switches the axes or dimensions of a tensor 

In [49]:
tensor_b.T , tensor_b.T.shape # this will switch the shape of the tensor_b from (3,2) to (2,3)

# T stands for transpose

(tensor([[ 5,  7,  9],
         [ 6,  4, 10]]),
 torch.Size([2, 3]))

In [50]:
torch.matmul(tensor_a,tensor_b.T)

tensor([[ 17,  15,  29],
        [ 39,  37,  67],
        [ 73,  67, 125]])

##### Finding the min , max , mean ,sum etc (tensor aggregation)

In [51]:
tensor = torch.rand(3,4)
tensor , tensor.dtype

(tensor([[0.8510, 0.8993, 0.8714, 0.0188],
         [0.6331, 0.9133, 0.3725, 0.7137],
         [0.9646, 0.1863, 0.9004, 0.0010]]),
 torch.float32)

In [52]:
tensor.min() , tensor.max() , tensor.sum()

(tensor(0.0010), tensor(0.9646), tensor(7.3254))

In [53]:
torch.mean(tensor)

tensor(0.6105)

In [54]:
tensor.argmin() , tensor.argmax() # this will return the index of the min and max value

(tensor(11), tensor(8))

#### Reshaping , stacking , squeezing and unsqueezing tensors

* Reshaping - reshapes an input tensor to a defined shape
* View - Return a view of an input tensor of certain shape but keep the same memory as the original tensor

* stacking - combine multiple tensors on top of each other (vstack) or side by side (hstack)

* squeeze - remove a 1 dimension to a target tensor 
* unsqueeze - add a 1 dimension to a target tensor
* permute - return a view of the input with dimensions permuted(swapped in a certain way)

In [55]:
import torch
x = torch.arange(1.,10.)
x.shape , x

(torch.Size([9]), tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [56]:
# Add extra dimension 
x_reshaped = x.reshape(9,1)
x_reshaped , x_reshaped.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [57]:
# change the view 
x_view = x.view(1,9)
x_view , x_view.shape

# view store the memory of the original tensor , so changes in one will change the other


(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [58]:
x_view[:, 0] = 5
x_view , x_reshaped


(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([[5.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]))

In [59]:
# Stack tensor on top of each other

x_stacked = torch.stack([x,x,x,x],dim=1)
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [60]:
x_reshaped

tensor([[5.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]])

In [61]:
# squeeze and unsqueeze 
# squeeze remove all the 1 dimensions from the tensor

x_reshaped.squeeze() , x_reshaped.squeeze().shape

(tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [62]:
# unsqueeze add a 1 dimension to the tensor
x_reshaped.unsqueeze(dim=0) , x_reshaped.unsqueeze(dim=0).shape

(tensor([[[5.],
          [2.],
          [3.],
          [4.],
          [5.],
          [6.],
          [7.],
          [8.],
          [9.]]]),
 torch.Size([1, 9, 1]))

In [63]:
# Permutle -> rearrange the dimensions of the tensor in a specified order
x_original = torch.rand(224,224,3) # height , width , color channels

x_original , x_original.shape


(tensor([[[0.5864, 0.1159, 0.1209],
          [0.4893, 0.5107, 0.4268],
          [0.0140, 0.9298, 0.1478],
          ...,
          [0.4663, 0.7592, 0.2700],
          [0.7011, 0.9488, 0.2598],
          [0.4651, 0.7926, 0.4243]],
 
         [[0.7465, 0.8556, 0.2061],
          [0.2280, 0.2457, 0.6207],
          [0.3741, 0.3553, 0.5997],
          ...,
          [0.5715, 0.6976, 0.3459],
          [0.5524, 0.0439, 0.9434],
          [0.9799, 0.3324, 0.8400]],
 
         [[0.4248, 0.4027, 0.1185],
          [0.6528, 0.9066, 0.0532],
          [0.7773, 0.9596, 0.6923],
          ...,
          [0.0791, 0.2030, 0.3756],
          [0.2428, 0.2466, 0.6361],
          [0.5729, 0.2837, 0.5770]],
 
         ...,
 
         [[0.3924, 0.0453, 0.8676],
          [0.9977, 0.0010, 0.2275],
          [0.5383, 0.6096, 0.8787],
          ...,
          [0.0210, 0.1168, 0.2553],
          [0.4079, 0.6847, 0.8344],
          [0.1823, 0.4781, 0.8577]],
 
         [[0.2501, 0.9647, 0.3588],
          [0

In [64]:
# permute the original tensor to rearrange the dimensions or target tensor in a pecifed order 

x_permuted = x_original.permute(2,0,1) # color channels , height , width
x_permuted , x_permuted.shape

(tensor([[[0.5864, 0.4893, 0.0140,  ..., 0.4663, 0.7011, 0.4651],
          [0.7465, 0.2280, 0.3741,  ..., 0.5715, 0.5524, 0.9799],
          [0.4248, 0.6528, 0.7773,  ..., 0.0791, 0.2428, 0.5729],
          ...,
          [0.3924, 0.9977, 0.5383,  ..., 0.0210, 0.4079, 0.1823],
          [0.2501, 0.0847, 0.8980,  ..., 0.8656, 0.2408, 0.1168],
          [0.1638, 0.4686, 0.7663,  ..., 0.3157, 0.9571, 0.3179]],
 
         [[0.1159, 0.5107, 0.9298,  ..., 0.7592, 0.9488, 0.7926],
          [0.8556, 0.2457, 0.3553,  ..., 0.6976, 0.0439, 0.3324],
          [0.4027, 0.9066, 0.9596,  ..., 0.2030, 0.2466, 0.2837],
          ...,
          [0.0453, 0.0010, 0.6096,  ..., 0.1168, 0.6847, 0.4781],
          [0.9647, 0.0810, 0.2408,  ..., 0.2535, 0.6694, 0.2692],
          [0.5554, 0.2094, 0.4000,  ..., 0.1544, 0.4968, 0.9970]],
 
         [[0.1209, 0.4268, 0.1478,  ..., 0.2700, 0.2598, 0.4243],
          [0.2061, 0.6207, 0.5997,  ..., 0.3459, 0.9434, 0.8400],
          [0.1185, 0.0532, 0.6923,  ...,

##### Indexing (select data from tensor)
Indexing with pytorch is similar to indexing with numpy

In [65]:
import torch 

x = torch.arange(1,10).reshape(1,3,3) # 1 sample , 3 rows , 3 columns
x , x.shape


(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [66]:
# Indexing on 0th dimension
# dim -0
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [67]:
# Indexing on the middle braket dim -1 
x[0][0]

tensor([1, 2, 3])

In [69]:
# indexing on last dimension dim-2
x[0][0][2]

tensor(3)

In [70]:
# we can also use ":" to get all the elements in that dimension
# ":" is used to get all the elements in that dimension
x[:,:,2]


tensor([[3, 6, 9]])

In [71]:
# Get index 0th index of 0th dimension and all values of 2nd dim

x[0,0,:]

tensor([1, 2, 3])

#### PyTorch and NumPy

* Data in Numpy , want in pytorch tensor -> torch.from_numpy(ndarray)
* Pytorch tensor -> NumPy -> `torch.Tensor.numpy()

In [72]:
# Numpy array to tensor 

import torch
import numpy as np

In [None]:
array = np.arange(1.0,8.0)
tensor = torch.from_numpy(array)
tensor , array

# Numpy default datatype is float 64
# When converting from numpy -> Pytroch ,pytorch reflects numpy's default datatype of float 64 unless specifeid otherwise


(tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64),
 array([1., 2., 3., 4., 5., 6., 7.]))

In [76]:
tensor.dtype

torch.float64

In [None]:
torch.float32 # change the datatype to float32

torch.float32

In [78]:
# chaning the value of array

array = array + 1
array ,tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [79]:
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor , numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [80]:
numpy_tensor.dtype

dtype('float32')

#### Reproducbility
Neural netowork start with random number - > tensor operations -> update numbers to try and make thn better representation of the data-> again -> again-> again

* To reduce randomness in nn and pytorch comes the concept of a random seed

Essentially what the random seed does is "flavour" the randomness

In [None]:
torch.rand(3,4) # if you run this again and again you will get different results

tensor([[0.6879, 0.8463, 0.3585, 0.4228],
        [0.5018, 0.8854, 0.9979, 0.3054],
        [0.9993, 0.8043, 0.9137, 0.7813]])

In [None]:
# to make the result reproducible we can set the seed
torch.manual_seed(42)
torch.rand(3,4)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

https://docs.pytorch.org/docs/stable/notes/randomness.html

#### Accessing a GPU

Running tensor on GPU for faster computation

In [None]:
### How to check if a GPU is available
torch.cuda.is_available()

True

In [90]:
#use gpu if available
device = "cuda" if torch.cuda.is_available() else "cpu"
device
# move tensor to the gpu if available
tensor = tensor.to(device)
tensor.device


device(type='cuda', index=0)

In [91]:
!nvidia-smi

Tue Nov 25 19:34:19 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.105.08             Driver Version: 580.105.08     CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3050 ...    Off |   00000000:01:00.0 Off |                  N/A |
| N/A   49C    P8              7W /   60W |      82MiB /   4096MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

In [None]:
# Putting tensor on the GPU

tensor = torch.tensor([1,2,3])

# tensor not on gpu 

tensor.device

device(type='cuda', index=0)

In [95]:
tensor_on_gpu = tensor.to(device)
tensor_on_gpu.device

device(type='cuda', index=0)

In [None]:
#### moving tesnor back to cpu 
# IF - tensor is on gpu we can not use it with numpy
tensor_on_cpu = tensor_on_gpu.to(device="cpu")
tensor_on_cpu.device

device(type='cpu')

In [102]:
# tensor on gpu can not be converted to numpy
tensor_on_gpu.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [103]:
tensor_on_cpu.numpy()

array([1, 2, 3])