In [2]:
!pip3 install torch

Collecting torch
  Downloading torch-2.1.2-cp310-none-macosx_11_0_arm64.whl (59.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.6/59.6 MB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting filelock
  Using cached filelock-3.13.1-py3-none-any.whl (11 kB)
Collecting sympy
  Downloading sympy-1.12-py3-none-any.whl (5.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.7/5.7 MB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting fsspec
  Downloading fsspec-2023.12.2-py3-none-any.whl (168 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m169.0/169.0 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
Collecting mpmath>=0.19
  Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m536.2/536.2 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mpmath, sympy, fsspec, filelock, torch
Successfully insta

In [26]:
import torch
import numpy as np

### Creating Vocabulary

In [27]:
SOS_token = 0 #start of sentence token
EOS_token = 1 #end of sentence token

#index to word mapping
index2words = {
    SOS_token:'SOS',  #vocab dict
    EOS_token:'EOS'

}

words  = "How are you doing ? I am good, thank you and you ?"
word_list = set(words.lower().split(" ")) #Get unique words from the above sentence


for word in word_list:
    index2words[len(index2words)] = word #because teh index of the latest word added will be the last position i.e. the now length of the vocab dict


index2words

{0: 'SOS',
 1: 'EOS',
 2: 'you',
 3: 'thank',
 4: '?',
 5: 'and',
 6: 'how',
 7: 'good,',
 8: 'i',
 9: 'am',
 10: 'doing',
 11: 'are'}

In [28]:
#word to index mapping

words2index = {word : key for key,word in index2words.items()}

words2index

{'SOS': 0,
 'EOS': 1,
 'you': 2,
 'thank': 3,
 '?': 4,
 'and': 5,
 'how': 6,
 'good,': 7,
 'i': 8,
 'am': 9,
 'doing': 10,
 'are': 11}

In [29]:
#to create torch tensors

def convert2tensors(sentence):
    word_list = sentence.lower().split(" ")
    sentence_index = [words2index[word] for word in word_list]

    sentence_tensor = torch.tensor(sentence_index,dtype = torch.long).view(1,-1) #The `-1` in the second dimension means that PyTorch should automatically infer the size of this dimension based on the original size of the tensor. The idea is that PyTorch will calculate the size such that the total number of elements in the tensor remains the same.
    return sentence_tensor
    

sent = "How are you doing you ?"
print(convert2tensors(sent))

tensor([[ 6, 11,  2, 10,  2,  4]])


## The Encoder

In [30]:
import torch.nn as nn

class EncoderRNN(nn.Module):
    def __init__(self,input_size,hidden_size): 
        """

        Args:
            input_size (_type_): size of the vocabulary dict
            hidden_size (_type_): size of the vectors of the word embeddings/size of the vectors going into and coming out of the recurring units 
                                e.g. if an input vector or vocab word is represented as a vector of size(1,5) then the hidden size will also be (1,5)
                                but the input size will be 12 if there are 12 words inthe vocabulary
        """

        super().__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size,hidden_size) #input_size = num of rows that the embedding wil have,hidden_size = no of elements that will be in each vector coming out of the embedding
        self.gru = nn.GRU(hidden_size,hidden_size,batch_first=True) #this GRU will be able to take vector of the size of the vector coming out of the embedding and will generate a vector of the same size
        #batch_first=True -> the tensor that the GRU is gonna generate will have as a first dimension the batch

    def forward(self,input_tensor):
        embedded = self.embedding(input_tensor)
        output,hidden = self.gru(embedded) # we here only have one gru layer we can have as many as we like
        return output, hidden


In [31]:
#Encoding a sentence

encoder = EncoderRNN(len(words2index),hidden_size=3)

sentence = "How are you doing ?"
sent_tensor = convert2tensors(sentence)

output,hidden = encoder(sent_tensor)

print(f"output_size: {output.size()} \n output: {output}") # 1 :batch size - coz only 1 sent,5: no of words in the sentence, 3:hidden size"
print(f"hidden_size: {hidden.size()} \n hidden: {hidden}") #1: batch ,1: no of layers,3: hidden_size

output_size: torch.Size([1, 5, 3]) 
 output: tensor([[[ 0.0650,  0.0368, -0.3729],
         [ 0.5080, -0.2216, -0.2133],
         [ 0.4138, -0.2536, -0.5126],
         [ 0.6746, -0.4543, -0.2361],
         [ 0.3519, -0.2177, -0.3059]]], grad_fn=<TransposeBackward1>)
hidden_size: torch.Size([1, 1, 3]) 
 hidden: tensor([[[ 0.3519, -0.2177, -0.3059]]], grad_fn=<StackBackward0>)


In [32]:
print(f"output_size: {output.size()} \n output last element: {output[:,-1]}")
print(f"hidden_size: {hidden.size()} \n hidden: {hidden}")

#here both last element of output and the hidden is same co zwe only have 1 gru layer and its not bidirectional

output_size: torch.Size([1, 5, 3]) 
 output last element: tensor([[ 0.3519, -0.2177, -0.3059]], grad_fn=<SelectBackward0>)
hidden_size: torch.Size([1, 1, 3]) 
 hidden: tensor([[[ 0.3519, -0.2177, -0.3059]]], grad_fn=<StackBackward0>)


## The Decoder

In [81]:
import torch.nn.functional as F

MAX_LENGTH = 10

class DecoderRNN(nn.Module):
    def __init__(self,hidden_size,output_size):
        """_summary_

        Args:
            hidden_size (_type_): num 
            output_size (_type_): should be same as the size of the vocab list
        """
        super(DecoderRNN,self).__init__()   #gonna initialize the nn.Module class
        self.embedding = nn.Embedding(output_size,hidden_size) #diff embedding than that used in encoder, going to have as many rows as the output size
        #embedding is going to generate a vector of size hidden size

        self.gru = nn.GRU(hidden_size,hidden_size,batch_first=True)
        self.out = nn.Linear(hidden_size,output_size)  #prediction head with linear layer to output the predictions



    def forward(self,encoder_outputs,encoder_hidden):
        batch_size = encoder_outputs.size(0) #this is the batch size of the encoder output
        #we are goin to usde this batch size to generate the input to the deocder to  initialize teh process
        decoder_input = torch.empty(batch_size,1,dtype= torch.long).fill_(SOS_token) #1 because we only need 1 input, batch_size because we need input for each of the element in the batch
        #we fill it with the start of sentence token
        decoder_hidden = encoder_hidden  #in the first step the hidden states are going to be inputted into teh GRu ar egoing to be the ones comin gout of encoder
        decoder_outputs = []
        
        for i in range(MAX_LENGTH):
            decoder_output,decoder_hidden = self.forward_step(decoder_input,decoder_hidden)
            decoder_outputs.append(decoder_output)

            _,topIdx = decoder_output.topk(1)  #gives us the index of the word that has the max value in the decoder output ~ equivalent to argmax
            decoder_input = topIdx.squeeze(-1).detach()  #squeee removes all the dimensions hthat have dimensionality 1, and we detach it to not confuse the gradient

        decoder_outputs= torch.cat(decoder_outputs, dim = 1)  #conactenate decoder outputs to make a pytorch tensor
        decoder_outputs = F.log_softmax(decoder_outputs,dim =-1) #apply the log_softmax on deocdrr outputs

        return decoder_outputs,decoder_hidden



    def forward_step(self,input_tensor,hidden):  #we call it for each element in the output sequence, generates only one element in the output sequence
        """
        We will iterate this forward step as many times as there are words in the input sentence
        """
        output = self.embedding(input_tensor)
        output = F.relu(output) #to add some non linearity to this NN
        output,hidden = self.gru(output, hidden)  #hidden is the hidden states generated by the encoder
        output = self.out(output)  # using the output from teh gru, we ar going to project it using the linear layer(i.e. self.out form init), to generate an output with a diff dimention


        return output,hidden


In [36]:
BATCH_SIZE = 32
HIDDEN_SIZE = 10
VOCAB_SIZE = 1000
OUTPUT_VECT_NUM = 10

input_tensor = torch.rand(BATCH_SIZE,OUTPUT_VECT_NUM,HIDDEN_SIZE)  #hidden size= size of the tensor coming out of the decoder
input_tensor.size()

torch.Size([32, 10, 10])

In [56]:
linear_layer = nn.Linear(HIDDEN_SIZE,VOCAB_SIZE) #here what we were able to do is project the output from decoder

out = linear_layer(input_tensor)
out.size()

torch.Size([32, 10, 1000])

In [57]:
_,indexes = out.topk(1)
indexes.size()

torch.Size([32, 10, 1])

In [66]:
#another random vector

out_ = torch.randn(3,4,5)  # has 5 vectors of size 4 and a total of 3 batches
_,indexes_ = out_.topk(1)
# indexes.size()
# topk : gives out the top value index in the output for all inputs

# indexes
indexes_.squeeze() #just removes the dimensions that are 1

tensor([[3, 1, 3, 1],
        [3, 0, 0, 0],
        [2, 2, 2, 2]])

In [62]:
# out

In [61]:

F.softmax(out,dim= -1) #applying softmax on out, dim =-1 normalize on last dimension
# .sum(-1) if we do this we get a tensor of all 1s
#i.e. teh softmax worked since the numbers ar enow in range 0-1
# and in the last dimension sum to 1 like we wanted

tensor([[[0.0006, 0.0010, 0.0015,  ..., 0.0009, 0.0012, 0.0005],
         [0.0005, 0.0011, 0.0011,  ..., 0.0010, 0.0008, 0.0006],
         [0.0006, 0.0011, 0.0018,  ..., 0.0007, 0.0007, 0.0006],
         ...,
         [0.0006, 0.0011, 0.0013,  ..., 0.0010, 0.0010, 0.0008],
         [0.0006, 0.0010, 0.0016,  ..., 0.0009, 0.0010, 0.0006],
         [0.0007, 0.0010, 0.0014,  ..., 0.0011, 0.0008, 0.0006]],

        [[0.0007, 0.0009, 0.0013,  ..., 0.0008, 0.0009, 0.0005],
         [0.0005, 0.0010, 0.0013,  ..., 0.0009, 0.0008, 0.0006],
         [0.0008, 0.0008, 0.0014,  ..., 0.0010, 0.0011, 0.0005],
         ...,
         [0.0008, 0.0009, 0.0014,  ..., 0.0010, 0.0008, 0.0006],
         [0.0005, 0.0012, 0.0013,  ..., 0.0007, 0.0010, 0.0007],
         [0.0006, 0.0011, 0.0012,  ..., 0.0008, 0.0008, 0.0006]],

        [[0.0007, 0.0009, 0.0010,  ..., 0.0007, 0.0006, 0.0005],
         [0.0006, 0.0010, 0.0016,  ..., 0.0008, 0.0009, 0.0005],
         [0.0005, 0.0014, 0.0014,  ..., 0.0009, 0.0010, 0.

In [68]:
out_

tensor([[[ 0.9193, -1.0916, -0.8731,  1.6631, -0.0410],
         [-0.6673,  0.3435,  0.1451,  0.2436,  0.1969],
         [-0.3215, -0.0290, -0.5229,  0.7086, -0.2852],
         [ 0.5290,  1.4926, -0.9996, -0.3783, -1.1827]],

        [[ 0.5104,  0.8643, -1.3237,  1.5819,  0.2093],
         [ 0.5146, -0.2411, -0.2051, -1.0754,  0.2297],
         [ 1.7449, -0.4838,  0.9930, -1.0516,  0.4883],
         [ 1.4940,  0.6859, -0.3067,  0.9819, -0.2958]],

        [[-0.1065, -0.3531,  1.2323,  0.9363, -0.3118],
         [-1.6017, -0.0606,  0.4528, -0.0395,  0.2500],
         [-1.5264, -0.8603,  1.6205,  0.8130,  0.6255],
         [ 0.1485, -0.6808,  0.3740,  0.0938,  0.1565]]])

In [70]:
F.log_softmax(out_,dim = -1)

#we usually use log for classification problems

tensor([[[-1.3317, -3.3425, -3.1240, -0.5878, -2.2919],
         [-2.3851, -1.3743, -1.5727, -1.4743, -1.5210],
         [-1.9453, -1.6528, -2.1467, -0.9152, -1.9090],
         [-1.4866, -0.5230, -3.0152, -2.3939, -3.1983]],

        [[-1.8316, -1.4778, -3.6658, -0.7601, -2.1327],
         [-1.0691, -1.8247, -1.7887, -2.6590, -1.3540],
         [-0.6548, -2.8835, -1.4067, -3.4513, -1.9114],
         [-0.8659, -1.6740, -2.6666, -1.3780, -2.6557]],

        [[-2.2243, -2.4709, -0.8856, -1.1815, -2.4296],
         [-3.2033, -1.6621, -1.1488, -1.6411, -1.3516],
         [-3.8108, -3.1447, -0.6639, -1.4714, -1.6589],
         [-1.5356, -2.3649, -1.3101, -1.5903, -1.5276]]])

## Putting the Encoder - Decoder together

In [82]:
decoder = DecoderRNN(hidden_size = 3,output_size=len(words2index))
encoder = EncoderRNN(len(words2index),hidden_size=3)

sentence = "How are you doing ?"

input_tensor =convert2tensors(sentence)
output, hidden = encoder(input_tensor)

decoder_outputs , decoder_hiddens = decoder(output,hidden)
decoder_outputs


tensor([[ 6, 11,  2, 10,  4]])

In [86]:
#fucntion that takes a tensor and generates a sentence

def convert2sentence(tensor):
    words_list = [index2words[idx.item()] for idx in tensor] #idx.item will get the python value at that index in the tensor
    return " ".join(words_list)

_,topIdx = decoder_outputs.topk(1)
decoded_ids = topIdx.squeeze()
decoded_ids

tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [87]:
convert2sentence(decoded_ids)  #this output is bad because we didnt train our model

'you you you you you you you you you you'