In [2]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import numpy as np
import time
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [3]:
%%time
start_time = time.time()
# matrix operations here
zeros = torch.zeros(1, 1)
end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

# wall time : how long it actually takes in real time, how long should we wait.

0.02036285
CPU times: total: 15.6 ms
Wall time: 20.4 ms


## Forward pass and back propogation through the network

## Initialising tensors

In [5]:
torch_rand1 = torch.rand(100, 100, 100, 100).to(device)
torch_rand2 = torch.rand(100, 100, 100, 100).to(device)
np_rand1 = torch.rand(100, 100, 100, 100)
np_rand2 = torch.rand(100, 100, 100, 100)

start_time = time.time()

rand = (torch_rand1 @ torch_rand2)

end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")  #for gpu


start_time = time.time()

rand = np.multiply(np_rand1, np_rand2)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")  #for cpu

0.40020967
0.16988897


## torch.multinomial - to predict what word is going to come next

In [7]:
# Define a probability tensor
probabilities = torch.tensor([0.1, 0.9]) # 10 % prob that 0 is going to come next, 90% prob that 1 is going to come next
# 10% or 0.1 => 0, 90% or 0.9 => 1. each probability points to the index of the probability in the tensor
# Draw 5 samples from the multinomial distribution
samples = torch.multinomial(probabilities, num_samples=10, replacement=True)
print(samples)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])


## torch.cat - prob distribution will be used to pick the first one, and then based on the first one we are going to predict
## the next character, once we have predicted that we are going to concatenate the new one with the ones we have already predicted.
## By the end we will have all of the integers predicted

In [8]:
tensor = torch.tensor([1, 2, 3, 4])
out = torch.cat((tensor, torch.tensor([5])), dim=0)
out

tensor([1, 2, 3, 4, 5])

## torch.tril - predicting based on history of knowledge -basically, we can't predict while knowing what the answer is 


In [9]:
out = torch.tril(torch.ones(5, 5))
out

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [None]:
# we predicted 1 and rest of them aren't predicted yet - 1st row
# we predicted one more and rest of them aren't predicted yet - 2nd row
## goes on ...

In [10]:
out = torch.triu(torch.ones(5, 5))
out

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

In [11]:
# Masked fill - by exponentiating - 2.71^m
out = torch.zeros(5, 5).masked_fill(torch.tril(torch.ones(5, 5)) == 0, float('-inf'))
out
# 2.71^0 = 1, 2.71^-inf = 0, ..

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])

In [12]:
## verify masked fill
torch.exp(out)

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

## torch.stack - stacking a bunch of one-dimensional(length of integers or tokens) on top of each other - to make a 2-dimensional or
## bunch of blocks to make a batch

In [7]:
tensor1 = torch.tensor([1, 2, 3])
tensor2 = torch.tensor([4, 5, 6])
tensor3 = torch.tensor([7, 8, 9])

# Stack the tensors along a new dimension
stacked_tensor = torch.stack([tensor1, tensor2, tensor3])
stacked_tensor

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [6]:
input = torch.zeros(2, 3, 4)
out1 = input.transpose(0, 1)
out2 = input.transpose(-2,-1)
print(out1.shape)
print(out2.shape)
# torch.permute works the same but you provide the new order of dimensions instead of the dimensions you'd like to swap.

torch.Size([3, 2, 4])
torch.Size([2, 4, 3])


## torch.nn - linear function - contains anything that has learnable parameters, highly used in our model
## when we apply weight and bias under nn.module it will learn those and become better and better
## and it will train based on how accurate those are
## and how close certain parameters bring it to the desired output.

#### make sure input layers aligns with the hidden layers
#### and hidden layers aligns with the consecutive hidden layers 
#### and the last hidden layer alligns with the following output layer

#### lets suppose there are 2 , 4, 3, 1 neurons in 4 seperate coloumns
#### transformations apply in this way {2,4} {4,3} (3,1}

In [9]:
import torch.nn as nn
sample = torch.tensor([10.,10.,10.])
linear = nn.Linear(3, 3, bias=False)
print(linear(sample))

tensor([ 1.9531, -3.5460, -5.2060], grad_fn=<SqueezeBackward4>)


## softmax function - first exponentiate , then add the total and then div each element by the exponentiated total
## [1,2,3] -> 2.71^1 2.71^2 2.71^3 => (2.71+7.38+20.08)  => 30.17 so 2.71/30.17 = x, 7.34/30.17 = y, 19.9/30.17= z
## [1,2,3] -> [x,y,z] -> [0.089,0.243,0.659]

import torch.nn.functional as F

# Create a tensor
tensor1 = torch.tensor([1.0, 2.0, 3.0])

# Apply softmax using torch.nn.functional.softmax()
softmax_output = F.softmax(tensor1, dim=0)

print(softmax_output)

# Embedding Vectors

## nn.Embedding -> they will store vector of information of a character or a word
## for ex : 'a' : character, [0.2, 0.1, 0.5, 0.3, 0.9 ] - embedding vector
## contains vocab size -> how many unique characters are actually in our dataset

## nn.embedding -> is a vector or numerical representation of the sentiment of a letter

In [5]:
# Initialize an embedding layer
vocab_size = 80
embedding_dim = 6
embedding = nn.Embedding(vocab_size, embedding_dim)

# Create some input indices
input_indices = torch.LongTensor([1, 5, 3, 2])

# Apply the embedding layer
embedded_output = embedding(input_indices)

# The output will be a tensor of shape (4, 100), where 4 is the number of inputs
# and 6 is the dimensionality of the embedding vectors
print(embedded_output.shape)
print(embedded_output)

torch.Size([4, 6])
tensor([[ 1.2622,  0.2729, -1.2555, -0.5327, -0.3371, -2.0049],
        [-1.7446,  0.7308,  0.4424, -2.6829,  3.0430, -0.7249],
        [ 0.2020, -0.0076, -0.3018,  2.0341, -1.7975,  0.4426],
        [-2.3298,  0.8533, -0.9933,  0.6968,  1.0662,  2.4717]],
       grad_fn=<EmbeddingBackward0>)


# Matrix multiplication - for multiplying weights in neural networks- make sure they are floating point numbers
### a @ b - mul 2 matrices - in pytorch

In [6]:
a = torch.tensor([[1,2],[3,4],[5,6]])
b = torch.tensor([[7,8,9],[10,11,12]])
# print(a @ b)
print(torch.matmul(a, b))

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])


In [7]:
int_64 = torch.randint(1, (3, 2)).float()
#type int64
float_32 = torch.rand(2,3)
#type float32
# print(int_64.dtype, float_32.dtype)
result = torch.matmul(int_64, float_32)
print(result)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [10]:
a = torch.rand(2, 3, 5)
print(a.shape)
x, y, z = a.shape
a = a.view(x,y,z) #view allows us to unpack with the dot shape
#and then we can use a view to put them back together into a tensor
# print(x, y, z)
print(a.shape)


torch.Size([2, 3, 5])
torch.Size([2, 3, 5])


In [None]:
input = torch.rand((4, 8, 10))
B, T, C = input.shape
output = input.view(B*T, C)
print(output)
# print(input)
print(output[:, -1, :])