In [65]:
import torch

In [66]:
# Is pytorch connected to GPU?
torch.cuda.is_available()

True

In [67]:
# If you have cuda it will run on cuda else cpu.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

Device: cuda


In [68]:
# what is the name of the gpu?
gpu_name = torch.cuda.get_device_name(0)
print(gpu_name)

NVIDIA GeForce RTX 3050 Laptop GPU


In [69]:
# Creating a tensor manually!
list_tensor = [[1, 2, 3, 4], [4, 5, 6, 7]]
tensor = torch.tensor(list_tensor)

print(tensor)

tensor([[1, 2, 3, 4],
        [4, 5, 6, 7]])


In [70]:
# How do we specify the type?
list_tensor = [[1, 2, 3, 4], [4, 5, 6, 7]]
tensor = torch.tensor(list_tensor, 
                      dtype = torch.float32)

print(tensor)

tensor([[1., 2., 3., 4.],
        [4., 5., 6., 7.]])


In [71]:
# How do we put it in GPU?
list_tensor = [[1, 2, 3, 4], [4, 5, 6, 7]]
tensor = torch.tensor(list_tensor, 
                      dtype = torch.float32,
                      device = "cuda")

print(tensor)

tensor([[1., 2., 3., 4.],
        [4., 5., 6., 7.]], device='cuda:0')


In [72]:
# What if we do not need gradient?
# How do we put it in GPU?
list_tensor = [[1, 2, 3, 4], [4, 5, 6, 7]]
tensor = torch.tensor(list_tensor, 
                      dtype = torch.float32,
                      device = "cuda",
                      requires_grad = False)

print(tensor)

tensor([[1., 2., 3., 4.],
        [4., 5., 6., 7.]], device='cuda:0')


In [73]:
# how do you know the dtype?
print("Data type:", tensor.dtype)

# to know the device
print("Device:", tensor.device)

# How do you know the shape?
print("Shape:", tensor.shape)

Data type: torch.float32
Device: cuda:0
Shape: torch.Size([2, 4])


In [74]:
# Uninitialized data. Whatever is in the memory is filled inside the tensor.
uninit_tensor = torch.empty(size = (5, 5))
print(uninit_tensor)

tensor([[1.4013e-45, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 4.9256e+10, 3.0892e-41],
        [4.3458e+09, 3.0890e-41, 5.4482e+14, 4.5623e-41, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]])


In [75]:
# Tensor filled with zeros
zeros_tensor = torch.zeros(size = (3, 3))
print(zeros_tensor)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [76]:
# Tensor filled with ones
zeros_tensor = torch.ones(size = (3, 3))
print(zeros_tensor)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])


In [77]:
# Initialize randomly using uniform dist
tensor = torch.rand(size = (3, 3))
print(tensor)

tensor([[0.5315, 0.9200, 0.7290],
        [0.2167, 0.9279, 0.1685],
        [0.1136, 0.6284, 0.1838]])


In [78]:
# Identity matrix
identity_tensor = torch.eye(n = 3, m = 3)
print(identity_tensor)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])


In [79]:
# Similar to list(range(start, end, step)) in python
ranges = torch.arange(start = 0, end = 5, step = 1)
print(ranges)

tensor([0, 1, 2, 3, 4])


In [80]:
# Creates range of steps(10) number of bins
x = torch.linspace(start = 0.1, end = 1, steps = 10)
print(x)

tensor([0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000, 0.9000,
        1.0000])


In [81]:
# if we want to preseve the values across the diagonal then we can
# pass the tensor
tensor = torch.diag(torch.ones(5))
print(tensor)

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])


In [82]:
# INT16 tensor
print(tensor.short())

tensor([[1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 0, 1]], dtype=torch.int16)


In [83]:
# INT64 tensor
print(tensor.long())

tensor([[1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 0, 1]])


In [84]:
# FLOAT16
print(tensor.half())

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]], dtype=torch.float16)


In [85]:
# FLOAT64
print(tensor.double())

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]], dtype=torch.float64)


In [86]:
# Work with numpy 
import numpy as np

# numpy array
numpy_tensor = np.zeros((5 , 5))

# convert numpy array to torch.tensor
tensor = torch.from_numpy(numpy_tensor)
print(tensor)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]], dtype=torch.float64)


In [87]:
#### Bacis Linear algebra OPS

In [88]:
# Dot product using python list
def dot_product(a, b):
    return sum([aval * bval for aval, bval in zip(a, b)])


# Matrix multiplication in using python list
def matmul(a, b):
    # resultant matrix
    res = []
    # Looping through the rows of a
    for i in range(len(a)):
        # appending one row to the resultant matrix
        res.append([])
        # looping through the rows of b
        for j in range(len(b)):
            # Performing element wise sum, also called dot product
            element_wise_sum = dot_product(a[i], b[j])
            # appending to row "i" of the resultant matrix
            res[i].append(element_wise_sum)
    return res

In [89]:
a = [
    [1, 2, 3],
    [4, 5, 6]
]

b = [
    [7, 8, 9],
    [10, 11, 12]
]

matmul(a, b)

[[50, 68], [122, 167]]

In [90]:
from math import sqrt

# Distance from origin
def distance_from_origin(w):
    return sqrt(sum([wval ** 2 for wval in w]))

a = [1, 2, 3]
distance_from_origin(a)

3.7416573867739413

In [91]:
# distance between two vectors
def distance_between_two_vectors(a, b):
    return sqrt(sum([(aval - bval) ** 2 for aval, bval in zip(a, b)]))

a = [1, 2, 3]
b = [4, 5, 6]
distance_between_two_vectors(a, b)

5.196152422706632

In [92]:
# vector addition using torch
# can be used for residual connections.
a = torch.tensor([1, 2, 3])
b = torch.tensor([4, 5, 6])

absum = torch.add(a, b)
print(absum)

# More pythonic!
print(a + b)

tensor([5, 7, 9])
tensor([5, 7, 9])


In [93]:
# division
div = torch.true_divide(a, b)
print(div)

# Divides each element by 2
div = torch.true_divide(a, 2)
print(div)

tensor([0.2500, 0.4000, 0.5000])
tensor([0.5000, 1.0000, 1.5000])


In [94]:
# Broadcasting | IMPORTANT!
a = torch.rand(size = (5, 5))
b = torch.rand(size = (1, 5))

print("a:", a)
print("b:", b)

print("B is brodcasted along the first axis.")
z = a - b
print(z)

a: tensor([[0.4984, 0.5983, 0.5536, 0.4711, 0.0625],
        [0.8188, 0.6412, 0.2827, 0.9892, 0.3091],
        [0.6976, 0.1063, 0.3415, 0.5130, 0.2362],
        [0.1838, 0.1359, 0.6560, 0.4277, 0.1255],
        [0.0595, 0.0527, 0.8090, 0.2089, 0.2726]])
b: tensor([[0.3735, 0.9051, 0.5762, 0.2842, 0.2377]])
B is brodcasted along the first axis.
tensor([[ 0.1250, -0.3068, -0.0226,  0.1869, -0.1751],
        [ 0.4453, -0.2640, -0.2934,  0.7050,  0.0714],
        [ 0.3242, -0.7988, -0.2347,  0.2288, -0.0015],
        [-0.1897, -0.7692,  0.0798,  0.1436, -0.1122],
        [-0.3140, -0.8524,  0.2329, -0.0752,  0.0349]])


In [95]:
a = torch.rand(size = (4, 5))
print("Shape of a:", a.shape)
print("Sum across the first dimension:", torch.sum(a, dim = 1))

Shape of a: torch.Size([4, 5])
Sum across the first dimension: tensor([2.9384, 3.3499, 1.9201, 2.6919])


In [96]:
print(a)
vals, indices = torch.sort(a)
print(vals)

tensor([[0.9641, 0.1176, 0.8918, 0.1839, 0.7810],
        [0.7747, 0.7300, 0.6658, 0.5261, 0.6532],
        [0.2188, 0.2446, 0.0414, 0.4322, 0.9830],
        [0.5028, 0.8465, 0.2300, 0.7743, 0.3383]])
tensor([[0.1176, 0.1839, 0.7810, 0.8918, 0.9641],
        [0.5261, 0.6532, 0.6658, 0.7300, 0.7747],
        [0.0414, 0.2188, 0.2446, 0.4322, 0.9830],
        [0.2300, 0.3383, 0.5028, 0.7743, 0.8465]])


In [97]:
# Values that are less then min will be set to min
# vals that are greater than max will be clamped to max
torch.clamp(a, min = 2, max = 10)

# We can create relu using clamp
def relu(x):
    return torch.clamp(x, min = 0)

In [98]:
a = torch.tensor([1, 0, 0, 1, 1, 1])

# check if any of the value is True
print(torch.any(a))

# check if all is True
print(torch.all(a))

tensor(True)
tensor(False)


In [106]:
# indexing
batch_size, features = 5, 512
data = torch.rand(size = (batch_size, features))

print("Shape:", data.shape)

# Retriving the first example data point in the batch
print(data[0, :].shape)

# Retriving the first feature of all examples
print(data[:, 0].shape) # we have 5 examples

# 3rd example, first 10 features
print(data[2, 0 : 10].shape)

Shape: torch.Size([5, 512])
torch.Size([512])
torch.Size([5])
torch.Size([10])


In [115]:
# Something more fancy!
x = torch.arange(10)
print(x)

# it will pick up the 3rd, 6th and 9th example in the batch
indices = [2, 5, 8]
print(x[indices])


# Picking up rows and columns based on indices
x = torch.rand(size = (3, 5))
print("Tensor:", x)

# pick up the element at first row and second column
# and element at second row and third column
rows, cols = [0, 1], [1, 2]
print(x[rows, cols])

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
tensor([2, 5, 8])
Tensor: tensor([[0.1901, 0.7005, 0.5319, 0.7154, 0.1544],
        [0.5504, 0.7524, 0.3400, 0.0268, 0.0097],
        [0.8642, 0.5110, 0.6382, 0.0090, 0.2280]])
tensor([0.7005, 0.3400])


In [122]:
x = torch.rand(size = (3, 5))
print(x)

# pick up all elements less than 0.50 or greater than 0.55
x[(x < 0.50) | (x > 0.55)]

# All even numbers
x = torch.arange(10)
print(x[x.remainder(2) == 0])

tensor([[0.1850, 0.2250, 0.0531, 0.7598, 0.8948],
        [0.1100, 0.1997, 0.2250, 0.6599, 0.0367],
        [0.4100, 0.9262, 0.0754, 0.2047, 0.3536]])
tensor([0, 2, 4, 6, 8])


In [127]:
# IMPORTANT!
condition = x >10
if_condition = x ** 2
else_condition = 0

x = torch.arange(6, 18)

print(torch.where(condition, if_condition, else_condition))

# All the unique values
x.unique()

tensor([  0,   0,   0,   0,   0, 121, 144, 169, 196, 225, 256, 289])


tensor([ 6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17])

In [134]:
x = torch.rand(size = (2, 2))

# how many dimensions do we have?
print(x.ndimension())

# Numbe of elements in x
print(x.numel())

2
4


In [136]:
# RESHAPING A TENSOR!
x = torch.arange(9)
print(x)

# Make it 3 x 3, using view
# Performs reshaping in place | Superior than view but memory should be contigious.
print(x.view(3, 3))

# using reshape
print(x.reshape(3, 3))

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8])
tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])
tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])


In [142]:
x = torch.empty((3, 3)).T
x.view(9)

RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.

In [147]:
# How to make anything contigious?
x.contiguous()


# Make it contigious first and then perform view!
x.contiguous().view(9)

tensor([1.6539e-09, 1.5917e-05, 0.0000e+00, 3.0885e-41, 0.0000e+00, 1.4257e+00,
        0.0000e+00, 4.7684e-06, 1.1210e-43])

In [156]:
# How to flatten?
x = torch.rand(1, 2, 5)
print(x)

# Flattening!
print(x.view(-1))

tensor([[[0.5831, 0.2233, 0.4322, 0.0102, 0.9558],
         [0.8117, 0.6465, 0.2381, 0.6944, 0.6036]]])
tensor([0.5831, 0.2233, 0.4322, 0.0102, 0.9558, 0.8117, 0.6465, 0.2381, 0.6944,
        0.6036])


In [158]:
# This can be pretty useful for implementing multihead attention!
# We can permute the dimension for the heads.
# Switch the axis!
x = torch.rand(64, 2, 5)

# we want to switch the axis!
x.permute(0, 2, 1).shape

torch.Size([64, 5, 2])

In [161]:
x = torch.arange(10)
print(x.unsqueeze(0).shape)
print(x.unsqueeze(1).shape)

torch.Size([1, 10])
torch.Size([10, 1])
