In [14]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from tqdm import tqdm , tqdm_notebook, notebook
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


In [2]:
PAD_token = 0 
SOS_token = 1
EOS_token = 2
BATCH_SIZE = 50

In [3]:


class EncoderRNN(nn.Module):
    def __init__(self,hidden_size,vocab_size, n_layers=1,dropout = 0):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout =(0 if n_layers ==1 else dropout), bidirectional= True)
    
    def forward(self, input_seq, input_length , hidden=None):
        embedded = self.embedding(input_seq)
        packed = pack_padded_sequence(embedded, input_length) #for faster computation
        outputs, hidden = self.gru(packed, hidden)

        outputs , _ = pad_packed_sequence(outputs)
        outputs = outputs[:,:, :self.hidden_size] + outputs[:,:,self.hidden_size:] # bidirectional gru so  adding both halves 

        return outputs, hidden

    

In [4]:
class Attn(nn.Module):
    def __init__(self, hidden_size,sequence_length):
        super(Attn,self).__init__()
        self.hidden_size = hidden_size
        self.w1 = nn.Linear(sequence_length* hidden_size, hidden_size)
        self.w2 = nn.Linear(hidden_size, hidden_size)
        

    def dot_score(self, hidden,encoder_output):
        return torch.sum(hidden * encoder_output, dim =2 )

    def forward(self, encoder_output, decoder_state):
        FC = self.w1(encoder_output) + self.w2(decoder_state)
        tan = F.tanh(FC)
        attention_weights = F.softmax(tan)
        return attention_weights

    




In [5]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size,output_size, vocab_size, n_layers = 1,dropout=0.1):
        super(DecoderRNN , self).__init__()
        self.hidden_size = hidden_size
        self.output_size= output_size
        self.n_layers = n_layers
        self.dropout = dropout   #vocab_size == output_size
        self.vocab_size = vocab_size

        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(2*hidden_size, hidden_size, n_layers, dropout = (0 if n_layers==1 else dropout))
        
        self.out = nn.Linear(hidden_size, output_size)

    def forward(self, input_step, context,last_hidden):
        embedded = self.embedding(input_step)
        embedded = self.embedding_dropout(embedded)
        x= torch.cat(embedded, context)

        rnn_output, hidden = self.gru(x, last_hidden)
        rnn_output = rnn_output.squeeze(0)
        output = self.out(rnn_output)
        output = F.softmax(output, dim =1 )
        return output, hidden
        
        
        


        


In [10]:
class MainModel(nn.Module):
    def __init__(self,hidden_size,vocab_size,sequence_length, n_layers=1,dropout = 0.0):
        super(MainModel, self).__init__()
        self.input_length = sequence_length
        self.Encode = EncoderRNN(hidden_size, vocab_size, n_layers,dropout )
        self.Attention = Attn(hidden_size, sequence_length)
        self.Decode = DecoderRNN(hidden_size,vocab_size, vocab_size, n_layers, dropout)
        

    def init_hidden(self):
        return torch.randn(self.n_layers , self.batch_size, self.hidden_size).to(self.device)

    def forward(self, input_seq ):
        hidden = self.init_hidden()

        encoder_output, encoder_hidden = self.Encode(input_seq, self.input_length, hidden)
        attn_weights = self.Attention(encoder_output,encoder_hidden)
        context = torch.sum(attn_weights*encoder_output) # context vector
        input_step = SOS_token* torch.ones(50)
        last_hidden = hidden
        output =[]
        for i in range(self.input_length):
            output_decoder, decoder_hidden = self.Decode(input_step, context, last_hidden)
            
            input_step = output_decoder.squeeze(0) 
            output.append(input_step)
            _, input_step = input_step.topk(1)
            input_step = input_step.squeeze(1)
            

            last_hidden = decoder_hidden
            attn_weights = self.Attention(encoder_output,last_hidden)
            context = torch.sum(attn_weights*encoder_output) # context vector

        return output

        

            
            
            




        

        
        

In [11]:
# USE cross entropy loss
#The model returns tensor from a softmax operation The index with max value is the required ans 
# the ans is the key value for the dictonary which has 1-> hi , 2-> bye key value pairs for words and numbers
# make both index2string and string2index dictionary
# hidden_size,vocab_size,sequence_length, n_layers=1,dropout = 0

In [16]:
criterion = nn.CrossEntropyLoss()
hidden_size = 500
vocab_size = 2000
sequence_length = 20 
n_layers = 2 
dropout =0.5
model = MainModel(hidden_size,vocab_size,sequence_length, n_layers, dropout)
optimizer = optim.Adam([p for p in model.parameters() if p.requires_grad], lr = 0.001)

train_loader = torch.randn(10,5 ,20) # initiallize it later later

In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else  'cpu')
model.train()
train_losses=[]
def train():
    for epoch in range (10):
        progress_bar = notebook.tqdm(train_loader,leave= False )

        losses = []
        total = 0 
        for inputs, target in progress_bar:
            inputs, target = inputs.to(device) , target.to(device)
            model.zero_grad()
            output = model(input)
            loss = criterion(output, target)
            loss.backward()

            nn.utils.clip_grad_norm_(model.parameters(),3)

            optimizer.step()

            progress_bar.set_description(f'Loss: {loss.item():.3f}')

            losses.append(loss.item())
            total +=1 

        epoch_loss = sum(losses)/total
        train_losses.append(epoch_loss)


        tqdm.write(f'Epoch #{epoch+1} \t Train Loss: {epoch_loss:3f}')





            

            

    

In [4]:
# file read path

import os
import codecs

def read_in(folder):
    files = os.listdir(folder)
    a_list = []
    for a_file in files:
        if not a_file.startswith("."):
            if a_file == "dialogues_train.txt":
                f = codecs.open(folder + a_file, "r", encoding = "ISO-8859-1", errors = "ignore")
                a_list.append(f.read())
                f.close()
    return a_list

In [None]:
test_list = read_in("/Users/Madara/Desktop/train/")
print(len(test_list))
print(test_list[0])

In [6]:
#text splitting
new_test_list = [word for word in test_list[0].split(" __eou__ ")]

for index in range(0, 5):
    print(new_test_list[index])



Say , Jim , how about going for a few beers after dinner ?
You know that is tempting but is really not good for our fitness .
What do you mean ? It will help us to relax .
Do you really think so ? I don't . It will just make us fat and act silly . Remember last time ?
I guess you are right.But what shall we do ? I don't feel like sitting at home .


In [7]:
#extract features
import nltk
from nltk import word_tokenize
nltk.download('punkt')



[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Madara\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [9]:

features = {}
word_index = 0
    
word_list = [word for text in new_test_list for word in word_tokenize(text.lower()) if word.isalpha()]
for word in word_list:
    if word in features:
        pass
    else:
       word_index += 1
       features[word] = word_index 
   
print(features)



