In [3]:
import torch
print(torch.__version__)

2.6.0+cu118


In [4]:
if torch.cuda.is_available():
  print("cuda is available")
else:
  print("cuda is not available")

cuda is available


## Creating a Tensor

In [5]:
#using empty
a= torch.empty(2,3)

In [6]:
type(a)

torch.Tensor

In [7]:
#using zeros
torch.zeros(2,3)

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [8]:
#using ones
torch.ones(2,3)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [9]:
#using rand -> gives random values between 0 and 1
torch.rand(2,3)

tensor([[0.3862, 0.3445, 0.9012],
        [0.4314, 0.3315, 0.0352]])

In [10]:
#each time torch.rand gives diff values so can use seed
torch.manual_seed(100)
torch.rand(2,3)

tensor([[0.1117, 0.8158, 0.2626],
        [0.4839, 0.6765, 0.7539]])

In [11]:
#using tensor
torch.tensor([
    [1,2,3],[4,5,6]
])

tensor([[1, 2, 3],
        [4, 5, 6]])

In [12]:
#arange
torch.arange(0,10) # from 0 upto 10 (excluding) with step 1
torch.arange(0,10,2)# from 0 upto 10 (excluding) with step 2

tensor([0, 2, 4, 6, 8])

In [13]:
#using linspace(linearly spaced)
torch.linspace(0,10,5) # from 0 to 10 give 5 linearly spaced values ie 1


tensor([ 0.0000,  2.5000,  5.0000,  7.5000, 10.0000])

In [14]:
torch.eye(5)   # identity matrix of rank 5

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [15]:
torch.full((3,3),5) # (3,3) shape tensor with 5 as each element

tensor([[5, 5, 5],
        [5, 5, 5],
        [5, 5, 5]])

## Tensor shapes

In [16]:
x= torch.tensor([[1,2,3],[4,5,6],[7,8,9]])

In [17]:
x.shape

torch.Size([3, 3])

In [18]:
torch.empty_like(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [19]:
torch.zeros_like(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [20]:
torch.ones_like(x)

tensor([[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]])

In [21]:
#torch.rand_like(x)  -> gives error because it tries to give same datatype as x ie int but rand doesnt give int it gives float between 0 and 1 so we need to explicitly specify data type
torch.rand_like(x,dtype=torch.float32)

tensor([[0.2627, 0.0428, 0.2080],
        [0.1180, 0.1217, 0.7356],
        [0.7118, 0.7876, 0.4183]])

## Tensor Data Types

In [22]:
x.dtype

torch.int64

In [23]:
#assign data type
torch.tensor([[1.0,2.0,3],[4,5.0,6],[7,8,9]],dtype=torch.int32)

  torch.tensor([[1.0,2.0,3],[4,5.0,6],[7,8,9]],dtype=torch.int32)


tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]], dtype=torch.int32)

In [24]:
torch.tensor([[1,2,3],[4,5,6]],dtype=torch.float32)

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [25]:
#using to()
x.to(torch.float64)

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]], dtype=torch.float64)

| **Data Type**             | **Dtype**         | **Description**                                                                                                                                                                |
|---------------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **32-bit Floating Point** | `torch.float32`   | Standard floating-point type used for most deep learning tasks. Provides a balance between precision and memory usage.                                                         |
| **64-bit Floating Point** | `torch.float64`   | Double-precision floating point. Useful for high-precision numerical tasks but uses more memory.                                                                               |
| **16-bit Floating Point** | `torch.float16`   | Half-precision floating point. Commonly used in mixed-precision training to reduce memory and computational overhead on modern GPUs.                                            |
| **BFloat16**              | `torch.bfloat16`  | Brain floating-point format with reduced precision compared to `float16`. Used in mixed-precision training, especially on TPUs.                                                |
| **8-bit Floating Point**  | `torch.float8`    | Ultra-low-precision floating point. Used for experimental applications and extreme memory-constrained environments (less common).                                               |
| **8-bit Integer**         | `torch.int8`      | 8-bit signed integer. Used for quantized models to save memory and computation in inference.                                                                                   |
| **16-bit Integer**        | `torch.int16`     | 16-bit signed integer. Useful for special numerical tasks requiring intermediate precision.                                                                                    |
| **32-bit Integer**        | `torch.int32`     | Standard signed integer type. Commonly used for indexing and general-purpose numerical tasks.                                                                                  |
| **64-bit Integer**        | `torch.int64`     | Long integer type. Often used for large indexing arrays or for tasks involving large numbers.                                                                                  |
| **8-bit Unsigned Integer**| `torch.uint8`     | 8-bit unsigned integer. Commonly used for image data (e.g., pixel values between 0 and 255).                                                                                    |
| **Boolean**               | `torch.bool`      | Boolean type, stores `True` or `False` values. Often used for masks in logical operations.                                                                                      |
| **Complex 64**            | `torch.complex64` | Complex number type with 32-bit real and 32-bit imaginary parts. Used for scientific and signal processing tasks.                                                               |
| **Complex 128**           | `torch.complex128`| Complex number type with 64-bit real and 64-bit imaginary parts. Offers higher precision but uses more memory.                                                                 |
| **Quantized Integer**     | `torch.qint8`     | Quantized signed 8-bit integer. Used in quantized models for efficient inference.                                                                                              |
| **Quantized Unsigned Integer** | `torch.quint8` | Quantized unsigned 8-bit integer. Often used for quantized tensors in image-related tasks.                                                                                     |

## Mathematical Operations


In [26]:
x= torch.rand(2,2)
x

tensor([[0.9014, 0.9969],
        [0.7565, 0.2239]])

In [27]:
#addition
x+2

#subtraction
x-2

#multiplication
x*2

#division
x/2

#int division
x //2

#mod
((x*100)//3)%2

#power
x**2

tensor([[0.8125, 0.9938],
        [0.5723, 0.0501]])

## Element Wise Operation

In [28]:
a = torch.rand(2,3)
b = torch.rand(2,3)
print(a,b)

tensor([[0.3023, 0.1784, 0.8238],
        [0.5557, 0.9770, 0.4440]]) tensor([[0.9478, 0.7445, 0.4892],
        [0.2426, 0.7003, 0.5277]])


In [29]:
#add
a+b

#sub
a-b

#mul
a*b

#div
a/b

#power
a**b

#mod
a % b

tensor([[0.3023, 0.1784, 0.3346],
        [0.0706, 0.2767, 0.4440]])

In [30]:
0.7909%0.1629

0.1393000000000001

In [31]:
c = torch.tensor([1,-2,3,-4])

In [32]:
#abs
torch.abs(c)

#neg
torch.neg(c)



tensor([-1,  2, -3,  4])

In [33]:
d = torch.tensor([1.9,2.3,3.7,4.4,5.5])

In [34]:
#round
torch.round(d)

tensor([2., 2., 4., 4., 6.])

In [35]:
#ceil
torch.ceil(d)

tensor([2., 3., 4., 5., 6.])

In [36]:
#floor
torch.floor(d)

tensor([1., 2., 3., 4., 5.])

In [37]:
#clamp brings values in range of [2,3] less than 2 => 2 and more than 3 =>3
torch.clamp(d,min=2,max=3)

tensor([2.0000, 2.3000, 3.0000, 3.0000, 3.0000])

## Reduction operation

In [38]:
e = torch.randint(low=0,high=10,size=(2,3),dtype=torch.float32) # gives random integers with low possible value=0 high poss val=10 with size (2,3)
e

tensor([[0., 0., 9.],
        [5., 7., 3.]])

In [39]:
#sum
torch.sum(e)

#sum along columns
torch.sum(e,dim=0)

#sum along rows
torch.sum(e,dim=1)

tensor([ 9., 15.])

In [40]:
#mean works for dtype float so change e dtype to float
torch.mean(e)
torch.mean(e,dim=0)
torch.mean(e,dim=1)

tensor([3., 5.])

In [41]:
#min and max
torch.min(e)
torch.min(e,dim=0)
torch.max(e)




tensor(9.)

In [42]:
#product
torch.prod(e)

tensor(0.)

In [43]:
#std
torch.std(e)
#variance
torch.var(e)


tensor(13.6000)

In [44]:
#argmax -> index of max number
torch.argmax(e)
torch.argmin(e)

tensor(0)

## Matrix operation

In [45]:
f = torch.randint(0,10,(2,3))
g = torch.randint(0,10,(3,2))
print(f)
print(g)

tensor([[9, 4, 0],
        [5, 7, 5]])
tensor([[9, 9],
        [7, 5],
        [9, 8]])


In [46]:
#matrix mul
torch.matmul(f,g)


tensor([[109, 101],
        [139, 120]])

In [47]:
vector1 = torch.tensor([1,2])
vector2 = torch.tensor([3,4])
#dot product
torch.dot(vector1,vector2)

tensor(11)

In [48]:
# transpose 0th dim with 1th dim given in syntax as it can work for n dim
torch.transpose(f,0,1)

tensor([[9, 5],
        [4, 7],
        [0, 5]])

In [49]:
h = torch.randint(size=(3,3), low=0, high=10, dtype=torch.float32)
h

tensor([[9., 7., 9.],
        [2., 6., 7.],
        [7., 8., 3.]])

In [50]:
#determinant
torch.det(h)

tensor(-275.)

In [51]:
#inverse
torch.inverse(h)

tensor([[ 0.1382, -0.1855,  0.0182],
        [-0.1564,  0.1309,  0.1636],
        [ 0.0945,  0.0836, -0.1455]])

## Comparison Operations


In [52]:
i = torch.randint(size=(2,3), low=0, high=10)
j = torch.randint(size=(2,3), low=0, high=10)

print(i)
print(j)

tensor([[6, 1, 5],
        [5, 0, 4]])
tensor([[3, 8, 8],
        [3, 3, 5]])


In [53]:
# greater than
i > j

# less than
i < j

# equal to
i == j

# not equal to
i != j
# greater than equal to

# less than equal to

tensor([[True, True, True],
        [True, True, True]])

## Special Functions

In [54]:
k = torch.randint(size=(2,3), low=0, high=10, dtype=torch.float32)
k

tensor([[0., 6., 4.],
        [0., 8., 4.]])

In [55]:
# log
torch.log(k)

tensor([[  -inf, 1.7918, 1.3863],
        [  -inf, 2.0794, 1.3863]])

In [56]:
# exp
torch.exp(k)

tensor([[1.0000e+00, 4.0343e+02, 5.4598e+01],
        [1.0000e+00, 2.9810e+03, 5.4598e+01]])

In [57]:
# sqrt
torch.sqrt(k)

tensor([[0.0000, 2.4495, 2.0000],
        [0.0000, 2.8284, 2.0000]])

In [58]:
# sigmoid
torch.sigmoid(k)

tensor([[0.5000, 0.9975, 0.9820],
        [0.5000, 0.9997, 0.9820]])

In [59]:
#softmax
#need to specify dim where we apply softmax eg dim=0 means column wise softmax eqn

torch.softmax(k,dim=0)

tensor([[0.5000, 0.1192, 0.5000],
        [0.5000, 0.8808, 0.5000]])

In [60]:
# relu
torch.relu(k)

tensor([[0., 6., 4.],
        [0., 8., 4.]])

## Inplace Operations


In [61]:
m = torch.rand(2,3)
n = torch.rand(2,3)

print(m)
print(n)

tensor([[0.9798, 0.5548, 0.6868],
        [0.4920, 0.0748, 0.9605]])
tensor([[0.3271, 0.0103, 0.9516],
        [0.2855, 0.2324, 0.9141]])


In [62]:
#for any operation between two tensors say m+n it occupies new memory for the result
#But if we want to place the result in the original tensor we can do so
m.add_(n)


tensor([[1.3069, 0.5650, 1.6384],
        [0.7776, 0.3072, 1.8746]])

In [63]:
m

tensor([[1.3069, 0.5650, 1.6384],
        [0.7776, 0.3072, 1.8746]])

In [64]:
m.relu_()

tensor([[1.3069, 0.5650, 1.6384],
        [0.7776, 0.3072, 1.8746]])

## Copying a tensor

In [65]:
a = torch.rand(2,3)
a

tensor([[0.7668, 0.1659, 0.4393],
        [0.2243, 0.8935, 0.0497]])

In [66]:
b=a

In [67]:
b

tensor([[0.7668, 0.1659, 0.4393],
        [0.2243, 0.8935, 0.0497]])

In [68]:
a[0][0] =0 # using assignment operation both are reference of memory location change in one cause change in other

In [69]:
a


tensor([[0.0000, 0.1659, 0.4393],
        [0.2243, 0.8935, 0.0497]])

In [70]:
b

tensor([[0.0000, 0.1659, 0.4393],
        [0.2243, 0.8935, 0.0497]])

In Python, id(a) returns the unique memory address (identity) of the object a in memory.

In [71]:
id(a)

2126089841136

In [72]:
id(b)

2126089841136

In [73]:
b = a.clone()

In [74]:
a[0][0] = 10


In [75]:
a


tensor([[10.0000,  0.1659,  0.4393],
        [ 0.2243,  0.8935,  0.0497]])

In [76]:
b

tensor([[0.0000, 0.1659, 0.4393],
        [0.2243, 0.8935, 0.0497]])

In [77]:
id(a)

2126089841136

In [78]:
id(b)

2126089862336

## Tensor Operations in GPU

upto now tensors were made in cpu although we have access of gpu

In [79]:
torch.cuda.is_available()

True

torch.device: This is a PyTorch function used to specify the device on which a tensor will be allocated (e.g., CPU or GPU).

'cuda': This refers to the GPU (specifically, CUDA-enabled GPU). CUDA is a parallel computing platform and application programming interface (API) model created by NVIDIA. It allows software to use the GPU for general-purpose processing, which is much faster than using the CPU for many tasks, especially in deep learning.

In [80]:
device =torch.device('cuda')

In [81]:
# creating a new tensor on gpu
torch.rand((2,3),device =device)

tensor([[0.3563, 0.0303, 0.7088],
        [0.2009, 0.0224, 0.9896]], device='cuda:0')

If you have a CUDA-enabled GPU and it is available, this will allocate the tensor on the GPU.
If not, it will default to the CPU.

In [82]:
#move an existing tensor to GPU
a= torch.rand(2,3)
a

tensor([[0.1780, 0.3011, 0.1893],
        [0.9186, 0.2131, 0.3957]])

In [83]:
b = a.to(device)

In [84]:
#in gpu for other operations as well
b+5

tensor([[5.1780, 5.3011, 5.1893],
        [5.9186, 5.2131, 5.3957]], device='cuda:0')

Analyse the difference in speed when we run operations in cpu and in gpu

In [85]:
import time

#define the size of the matrices
size = 10000

#create random matrices on CPU
matrix_cpu1= torch.randn(size,size) #randn : normal distribution
matrix_cpu2 =torch.randn(size,size)

#measure time on cpu
start_time = time.time()
result_cpu = torch.matmul(matrix_cpu1,matrix_cpu2)
cpu_time = time.time()- start_time

print(f"Time on CPU: {cpu_time:.4f} seconds")

# move matrices on GPU
matrix_gpu1 = matrix_cpu1.to('cuda')
matrix_gpu2 = matrix_cpu2.to('cuda')

#measure time on GPU
start_time = time.time()
result_gpu = torch.matmul(matrix_gpu1,matrix_gpu2)
gpu_time = time.time()- start_time

print(f"Time on GPU: {gpu_time:.4f} seconds")

#compare results
print("\nSpeedup (cpu/gpu):",cpu_time/gpu_time)

Time on CPU: 5.4139 seconds
Time on GPU: 4.5845 seconds

Speedup (cpu/gpu): 1.1809218061975846


## Reshaping Tensors

In [86]:
a= torch.ones(4,4)
a

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [87]:
#reshape ie product of original shapes must be eq to new one
a.reshape(2,2,2,2)

tensor([[[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]],


        [[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]]])

In [88]:
#flatten converts to 1d
a.flatten()

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [89]:
b = torch.rand(2,3,4)
b

tensor([[[0.0141, 0.8903, 0.1649, 0.3162],
         [0.1331, 0.6763, 0.4254, 0.2375],
         [0.5430, 0.1174, 0.6532, 0.5115]],

        [[0.3094, 0.7907, 0.7351, 0.2693],
         [0.8009, 0.5839, 0.0061, 0.4671],
         [0.8243, 0.2861, 0.7459, 0.8487]]])

In [90]:
#permute ie change dim 2nd dim comes first 0th sec 1th third
b.permute(2,0,1).shape
#ie shape 4,2,3


torch.Size([4, 2, 3])

In [91]:
#unsqueeze -> add new dimension at specified index
c = torch.rand(2,3,4)
c


tensor([[[0.4059, 0.2883, 0.2220, 0.6423],
         [0.7098, 0.5644, 0.4549, 0.0729],
         [0.8535, 0.9793, 0.0611, 0.1162]],

        [[0.7666, 0.8801, 0.4203, 0.8186],
         [0.0463, 0.7306, 0.2009, 0.3265],
         [0.6684, 0.3509, 0.7772, 0.2353]]])

In [92]:
c.unsqueeze(0).shape

torch.Size([1, 2, 3, 4])

In [93]:
c.unsqueeze(0) # ie encloses by 1 [] ie add new dim say batch

tensor([[[[0.4059, 0.2883, 0.2220, 0.6423],
          [0.7098, 0.5644, 0.4549, 0.0729],
          [0.8535, 0.9793, 0.0611, 0.1162]],

         [[0.7666, 0.8801, 0.4203, 0.8186],
          [0.0463, 0.7306, 0.2009, 0.3265],
          [0.6684, 0.3509, 0.7772, 0.2353]]]])

In [94]:
c.unsqueeze(1).shape

torch.Size([2, 1, 3, 4])

In [95]:
c.unsqueeze(1) # ie encloses each 3*4 matrix by 1 []

tensor([[[[0.4059, 0.2883, 0.2220, 0.6423],
          [0.7098, 0.5644, 0.4549, 0.0729],
          [0.8535, 0.9793, 0.0611, 0.1162]]],


        [[[0.7666, 0.8801, 0.4203, 0.8186],
          [0.0463, 0.7306, 0.2009, 0.3265],
          [0.6684, 0.3509, 0.7772, 0.2353]]]])

In [96]:
c.unsqueeze(2).shape

torch.Size([2, 3, 1, 4])

In [97]:
c.unsqueeze(2) # ie encloses each [4] matrix by 1 []

tensor([[[[0.4059, 0.2883, 0.2220, 0.6423]],

         [[0.7098, 0.5644, 0.4549, 0.0729]],

         [[0.8535, 0.9793, 0.0611, 0.1162]]],


        [[[0.7666, 0.8801, 0.4203, 0.8186]],

         [[0.0463, 0.7306, 0.2009, 0.3265]],

         [[0.6684, 0.3509, 0.7772, 0.2353]]]])

In [98]:
c.unsqueeze(3).shape

torch.Size([2, 3, 4, 1])

In [99]:
c.unsqueeze(3) # ie encloses each element by 1 []

tensor([[[[0.4059],
          [0.2883],
          [0.2220],
          [0.6423]],

         [[0.7098],
          [0.5644],
          [0.4549],
          [0.0729]],

         [[0.8535],
          [0.9793],
          [0.0611],
          [0.1162]]],


        [[[0.7666],
          [0.8801],
          [0.4203],
          [0.8186]],

         [[0.0463],
          [0.7306],
          [0.2009],
          [0.3265]],

         [[0.6684],
          [0.3509],
          [0.7772],
          [0.2353]]]])

In [100]:
#squeeze -> opposite of unsqueeze 1 lai hataucha from shape
c = torch.rand(1,3,4)
c


tensor([[[0.4301, 0.4385, 0.7375, 0.5494],
         [0.7109, 0.0148, 0.7262, 0.4660],
         [0.0610, 0.7108, 0.5127, 0.8211]]])

In [101]:
c.squeeze(0).shape

torch.Size([3, 4])

In [102]:
c.squeeze(0)

tensor([[0.4301, 0.4385, 0.7375, 0.5494],
        [0.7109, 0.0148, 0.7262, 0.4660],
        [0.0610, 0.7108, 0.5127, 0.8211]])

In [103]:
c = torch.rand(3,1,4)
c

tensor([[[0.1323, 0.2127, 0.9785, 0.3362]],

        [[0.7165, 0.1591, 0.2860, 0.1827]],

        [[0.7421, 0.5736, 0.1984, 0.7901]]])

In [104]:
c.squeeze(1).shape

torch.Size([3, 4])

In [105]:
c.squeeze(1)

tensor([[0.1323, 0.2127, 0.9785, 0.3362],
        [0.7165, 0.1591, 0.2860, 0.1827],
        [0.7421, 0.5736, 0.1984, 0.7901]])

In [106]:
x = torch.tensor([1,2,3,4])
x


tensor([1, 2, 3, 4])

In [116]:
x.reshape(2,2)
x.view(2,2)   #both works

tensor([[1, 2],
        [3, 4]])

In [None]:
x.reshape(-1,2)
x.view(-1,2)  #both works 

tensor([[1, 2],
        [3, 4]])

In [120]:
x.view(2,-1)

tensor([[1, 2],
        [3, 4]])

## Numpy and Pytorch

In [108]:
import numpy as np

In [109]:
#tensor to numpy arr
a= torch.tensor([[1,2,3],[4,5,6]])
a

tensor([[1, 2, 3],
        [4, 5, 6]])

In [110]:
b = a.numpy()
b

array([[1, 2, 3],
       [4, 5, 6]], dtype=int64)

In [111]:
type(b)

numpy.ndarray

In [112]:
#tensor from numpy
c= torch.from_numpy(b)
c

tensor([[1, 2, 3],
        [4, 5, 6]])

In [113]:
type(c)

torch.Tensor