In [None]:
#Preprocess

# Pre

In [14]:
# For tips on running notebooks in Google Colab, see
# https://pytorch.org/tutorials/beginner/colab
%matplotlib inline

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math
import json


USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")


from google.colab import drive
drive.mount('/content/gdrive')

corpus_name = "cornell"
corpus = os.path.join("/content/gdrive/My Drive/data", corpus_name)

def printLines(file, n=10):
    with open(file, 'rb') as datafile:
        lines = datafile.readlines()
    for line in lines[:n]:
        print(line)

printLines(os.path.join(corpus, "utterances.jsonl"))

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
b'{"id": "L1045", "conversation_id": "L1044", "text": "They do not!", "speaker": "u0", "meta": {"movie_id": "m0", "parsed": [{"rt": 1, "toks": [{"tok": "They", "tag": "PRP", "dep": "nsubj", "up": 1, "dn": []}, {"tok": "do", "tag": "VBP", "dep": "ROOT", "dn": [0, 2, 3]}, {"tok": "not", "tag": "RB", "dep": "neg", "up": 1, "dn": []}, {"tok": "!", "tag": ".", "dep": "punct", "up": 1, "dn": []}]}]}, "reply-to": "L1044", "timestamp": null, "vectors": []}\n'
b'{"id": "L1044", "conversation_id": "L1044", "text": "They do to!", "speaker": "u2", "meta": {"movie_id": "m0", "parsed": [{"rt": 1, "toks": [{"tok": "They", "tag": "PRP", "dep": "nsubj", "up": 1, "dn": []}, {"tok": "do", "tag": "VBP", "dep": "ROOT", "dn": [0, 2, 3]}, {"tok": "to", "tag": "TO", "dep": "dobj", "up": 1, "dn": []}, {"tok": "!", "tag": ".", "dep": "punct", "up": 1, "dn": []}]}]}, "reply-to": null

In [67]:
#setup the data, load dataset from the utterances.jsonl and create a python data, 
# pytorch vocabulary setup courtesy Pytorch
def loadlines(fileName):
    lines = {}
    conversations = {}
    with open(fileName, 'r', encoding='iso-8859-1') as f:
        for line in f:
            lineJson = json.loads(line)
            #get the field object for line
            lineObj = {}
            lineObj["lineID"] = lineJson["id"]
            lineObj["characterID"] = lineJson["speaker"]
            lineObj["text"] = lineJson["text"]
            lines[lineObj['lineID']] = lineObj

             #get the field object for convos
            if lineJson["conversation_id"] not in conversations:
                convObj = {}
                convObj["conversationID"] = lineJson["conversation_id"]
                convObj["movieID"] = lineJson["meta"]["movie_id"]
                convObj["lines"] = [lineObj]
            else:
                convObj = conversations[lineJson["conversation_id"]]
                convObj["lines"].insert(0, lineObj)
            conversations[convObj["conversationID"]] = convObj

    return lines, conversations


# Extracts pairs of sentences from conversations
def extractSentencePairs(conversations):
    qa_pairs = []
    for conversation in conversations.values():
        # Iterate over all the lines of the conversation
        for i in range(len(conversation["lines"]) - 1): 
            inputLine = conversation["lines"][i]["text"].strip()
            targetLine = conversation["lines"][i+1]["text"].strip()
            # only select lines if both input and output present
            if inputLine and targetLine:
                qa_pairs.append([inputLine, targetLine])
    return qa_pairs
  
# Setup FilePath
# Upload the file in data/cornell/ in gooogle drive
datafile = os.path.join(corpus, "formatted_movie_lines.txt")
delimiter = '\t'
delimiter = str(codecs.decode(delimiter, "unicode_escape"))

# Initialize lines dict and conversations dict
lines = {}
conversations = {}
# Load lines and conversations
print("Loading Lines and Conversations from Datatset, saving them locally")
lines, conversations =loadlines(os.path.join(corpus, "utterances.jsonl"))
# Write new csv file
print("\nWriting newly formatted file...")
with open(datafile, 'w', encoding='utf-8') as outputfile:
    writer = csv.writer(outputfile, delimiter=delimiter, lineterminator='\n')
    for pair in extractSentencePairs(conversations):
        writer.writerow(pair)


Loading Lines and Conversations from Datatset, saving them locally

Writing newly formatted file...


In [82]:
#Helper Function class 
  #We will set the length of sentence that we will consider
MAX_LENGTH = 10  # Maximum sentence length to consider
class SetVocab:
  def __init__(self,vocab,corpus, corpus_name, datafile):
    self.vocab = vocab
    self.corpus = corpus
    self.datafile = datafile
    self.corpus_name = corpus_name

  def unicodeToAscii(self,s):
      return ''.join(
          c for c in unicodedata.normalize('NFD', s)
          if unicodedata.category(c) != 'Mn'
      )

  # Lowercase, trim, and remove non-letter characters
  def normalizeString(self,s):
      s = self.unicodeToAscii(s.lower().strip())
      s = re.sub(r"([.!?])", r" \1", s)
      s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
      s = re.sub(r"\s+", r" ", s).strip()
      return s

  # Read query/response pairs and return a voc object
  def readVocs(self):
      print("Reading lines...")
      # Read the file and split into lines
      lines = open(self.datafile, encoding='utf-8').\
          read().strip().split('\n')
      # Split every line into pairs and normalize
      pairs = [[self.normalizeString(s) for s in l.split('\t')] for l in lines]
      voc = self.vocab(self.corpus_name)
      return voc, pairs

  # Returns True iff both sentences in a pair 'p' are under the MAX_LENGTH threshold
  def filterPair(self,p):
      # Input sequences need to preserve the last word for EOS token
      return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH

  # Filter pairs using filterPair condition
  def filterPairs(self,pairs):
      return [pair for pair in pairs if self.filterPair(pair)]



In [85]:
# Create default tokens, these will be used to pad or signal
#sentence functionalities to the encoder
PAD_token = 0  # Used for padding short sentences
SOS_token = 1  # Start-of-sentence token
EOS_token = 2  # End-of-sentence token

#Build a chatbot vocabulary based on the word corpus that we have 
class ChatbotVocab:
    def __init__(self, name):
        self.name = name
        # we will define later
        self.trimmed = False
        #create dictionaries to store the index and the count for each word in the corpus
        self.maptoindex = {}
        self.maptocount = {}
        #map index to word for faster retrieval
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3  # Count SOS, EOS, PAD, consider unique only
        self.trimmed = False

    #any time we pass a sentence to the vocah
    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    #called by the sentence for each word it hass
    def addWord(self, word):
      """
      Check if the word is in the corpus
      If not create new identity for it
      create an index for it, set count
      Increment the number of words in vocab
      """
      if word not in self.maptoindex:

          self.maptoindex[word] = self.num_words
          self.maptocount[word] = 1
          self.index2word[self.num_words] = word
          self.num_words += 1
      else:
          self.maptocount[word] += 1

    # Avoid noise by trimming certain words that are rare
    def trim(self, min_count=3):
        if self.trimmed:
            return
        self.trimmed = True

        keep_words = []

        for k, v in self.maptocount.items():
            if v >= min_count:
                keep_words.append(k)

        print('Keeping only these many words {} / {} = {:.4f}'.format(
            len(keep_words), len(self.maptoindex), len(keep_words) / len(self.maptoindex)
        ))

        # Reset and add all non trimmed words again dictionaries
        self.maptoindex = {}
        self.maptocount = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3 # Count default tokens

        for word in keep_words:
            self.addWord(word)


def trimRareWords(voc, pairs, MIN_COUNT):
    # Trim words used under the MIN_COUNT from the voc
    
    voc.trim(MIN_COUNT)
    # Filter out pairs with trimmed words
    keep_pairs = []
    for pair in pairs:
        input_sentence = pair[0]
        output_sentence = pair[1]
        keep_input = True
        keep_output = True
        # Check input sentence
        for word in input_sentence.split(' '):
            if word not in voc.maptoindex:
                keep_input = False
                break
        # Check output sentence
        for word in output_sentence.split(' '):
            if word not in voc.maptoindex:
                keep_output = False
                break

        # Only keep pairs that do not contain trimmed word(s) in their input or output sentence
        if keep_input and keep_output:
            keep_pairs.append(pair)

    print("Trimmed to {} pairs".format(len(keep_pairs)))
    return keep_pairs

# Using the functions defined above, return a populated voc object and pairs list
def loadPrepareData(corpus, corpus_name, datafile, save_dir):
    print("Start preparing training data ...")
    #Create the Vocab Helper Class by 
    setup_voc = SetVocab(ChatbotVocab,corpus, corpus_name,datafile)
    voc, pairs = setup_voc.readVocs()
    pairs = setup_voc.filterPairs(pairs)
    print("Trimmed to {!s} sentence pairs".format(len(pairs)))
    print("Counting words...")
    for pair in pairs:
        voc.addSentence(pair[0])
        voc.addSentence(pair[1])
    MIN_COUNT = 3    # Minimum word count threshold for trimming
    #trim the pairs
    pairs = trimRareWords(voc, pairs,MIN_COUNT)
    return voc, pairs


# Load/Assemble voc and pairs
save_dir = os.path.join("data", "save")
voc, pairs = loadPrepareData(corpus, corpus_name, datafile, save_dir)
print(f'Words in our courpus {voc.num_words}')
print(f'Number of pairs {len(pairs)}')
# Print some pairs to validate
print("\npairs:")
for pair in pairs[:5]:
    print(pair)



Start preparing training data ...
Reading lines...
Trimmed to 64313 sentence pairs
Counting words...
Keeping only these many words 7833 / 18079 = 0.4333
Trimmed to 53131 pairs
Words in our courpus 7836
Number of pairs 53131

pairs:
['they do to !', 'they do not !']
['she okay ?', 'i hope so .']
['wow', 'let s go .']
['what good stuff ?', 'the real you .']
['do you listen to this crap ?', 'what crap ?']


In [96]:
# creating a batch from the sequences
def sent2index(voc, sentence):
    #return the index of each word in the corpus
    return [voc.maptoindex[word] for word in sentence.split(' ')] + [EOS_token]


def Padding(l, fillvalue=PAD_token):
    return list(itertools.zip_longest(*l, fillvalue=fillvalue))

def binaryMask(l, value=PAD_token):
    m = []
    for i, seq in enumerate(l):
        m.append([])
        for token in seq:
            if token == PAD_token:
                m[i].append(0)
            else:
                m[i].append(1)
    return m

# Returns padded input sequence tensor and lengths
def inputBatch(l, voc):
    indexes_batch = [sent2index(voc, sentence) for sentence in l]
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    padList = Padding(indexes_batch)
    padVar = torch.LongTensor(padList)
    return padVar, lengths

# Returns padded target sequence tensor, padding mask, and max target length
def outputBathc(l, voc):
    indexes_batch = [sent2index(voc, sentence) for sentence in l]
    max_target_len = max([len(indexes) for indexes in indexes_batch])
    padList = Padding(indexes_batch)
    mask = binaryMask(padList)
    mask = torch.BoolTensor(mask)
    padVar = torch.LongTensor(padList)
    return padVar, mask, max_target_len

# Returns all items for a given batch of pairs
# Returns all items for a given batch of pairs
def batch2TrainData(voc, pair_batch):
    input_batch, output_batch = [], []
    for pair in pair_batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
    inp, lengths = inputBatch(input_batch, voc)
    output, mask, max_target_len = outputBathc(output_batch, voc)
    return inp, lengths, output, mask, max_target_len



In [19]:
# Define Transformer

# Create Custom Dataset and Dataloader

In [97]:
# split the data into trian and validate
validation_set_size = 0.2 
dataset_size = len(pairs)
validation_size = int(validation_set_size * dataset_size)
train_size = dataset_size - validation_size

In [98]:
from torch.utils.data import DataLoader, random_split


import torch
#Create a Dataloader
class Dataset_pairs(torch.utils.data.Dataset):
  'Make a dataset from the already existing pairs'
  def __init__(self, pairs):
        'Initialization'
        self.pairs = pairs

  def __len__(self):
        'Denotes the total number of samples'
        return len(self.pairs)

  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample

        pair = self.pairs[index]
        # Load data and get label
        

        return pair 

CustomData = Dataset_pairs(pairs)

#The pairs from dataloader will be sent to the collate function, we will 
#get the desired output from here
def collate_fn(batch):
    op = batch2TrainData(voc,batch)
    return op

# Split the dataset into training and validation subsets
train_dataset, val_dataset = random_split(CustomData, [train_size, validation_size])

# Define batch sizes for training and validation
batch_size = 64
# Create data loaders for training and validation
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True,collate_fn=collate_fn)

In [89]:
# training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
#                   ]
# print('tr')
# print(training_batches)
# print('tn')
# b = [random.choice(pairs) for _ in range(5)]
# print(b)
# i = 0
# for batch in train_loader:
#   print(batch)
#   if(i==0):
#     break
    

# Luong Attention, GRU gates Encoder Decoder

In [100]:
from torch import Tensor
import torch
import torch.nn as nn
from torch.nn import Transformer
import math
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class GRUencoder(nn.Module):
    def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
        super(GRUencoder, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding

        # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size'
        #   because our input size is a word embedding with number of features == hidden_size
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers,
                          dropout=(0 if n_layers == 1 else dropout), bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):
        # Convert word indexes to embeddings
        embedded = self.embedding(input_seq)
        # Pack padded batch of sequences for RNN module
        packed = nn.utils.rnn.pack_padded_sequence(embedded, input_lengths,enforce_sorted=False)
        # Forward pass through GRU
        outputs, hidden = self.gru(packed, hidden)
        # Unpack padding
        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs)
        # Sum bidirectional GRU outputs
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
        # Return output and final hidden state
        return outputs, hidden



# Luong attention layer
class Attn(nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.hidden_size = hidden_size     
        self.attn = nn.Linear(self.hidden_size, hidden_size)

    def general_score(self, hidden, encoder_output):
        energy = self.attn(encoder_output)
        return torch.sum(hidden * energy, dim=2)

    def forward(self, hidden, encoder_outputs):
        attn_energies = self.general_score(hidden, encoder_outputs)
        attn_weights = attn_energies.t()

        # Return the softmax normalized probability scores (with added dimension)
        return F.softmax(attn_weights, dim=1).unsqueeze(1)



class LuongAttnDecoderGRU(nn.Module):
    def __init__(self, attn_model, embedding, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(LuongAttnDecoderGRU, self).__init__()

        # Keep for reference
        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout

        # Define layers
        self.embedding = embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

        self.attn = Attn(attn_model, hidden_size)

    def forward(self, input_step, last_hidden, encoder_outputs):
        # Get embedding of current input word
        embedded = self.embedding(input_step)
        embedded = self.embedding_dropout(embedded)
        # Forward through unidirectional GRU
        rnn_output, hidden = self.gru(embedded, last_hidden)
        # Calculate attention weights from the current GRU output
        attn_weights = self.attn(rnn_output, encoder_outputs)
        # Multiply attention weights to encoder outputs to get new "weighted sum" context vector
        context = attn_weights.bmm(encoder_outputs.transpose(0, 1))
        rnn_output = rnn_output.squeeze(0)
        context = context.squeeze(1)
        concat_input = torch.cat((rnn_output, context), 1)
        concat_output = torch.tanh(self.concat(concat_input))
        # Predict next word using Luong eq. 6
        output = self.out(concat_output)
        output = F.softmax(output, dim=1)
        # Return output and final hidden state for next work
        return output, hidden

In [91]:
#Create Custom Dataset and Dataloader
pairs[1][1]

'i hope so .'

# Training Pipeline

In [101]:
def maskNLLLoss(inp, target, mask):
    nTotal = mask.sum()
    crossEntropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1)).squeeze(1))
    loss = crossEntropy.masked_select(mask).mean()
    loss = loss.to(device)
    return loss, nTotal.item()


def train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding,encoder_optimizer, decoder_optimizer, batch_size, clip,teacher_forcing_ratio, max_length=MAX_LENGTH):
    encoder.train()
    decoder.train()   #model will train 
    # Zero gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    # Set device options
    input_variable = input_variable.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)
    # Lengths for inputs
    lengths = lengths.to("cpu")

    # Initialize variables
    loss = 0
    batch_loss = []
    n_totals = 0

    # Forward pass through encoder
    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)

    # give ititlal SOS input to the decoder
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)

    # set decoer states
    decoder_hidden = encoder_hidden[:decoder.n_layers]

    # Setup Teacher Forcing Ratio
    TF_flag = True if random.random() < teacher_forcing_ratio else False
    if TF_flag:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs
            )
            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            batch_loss.append(mask_loss.item() * nTotal)
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs
            )
            # No teacher forcing: next input is decoder's own current output
            #decide the top softmax output
            _, topval = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topval[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            batch_loss.append(mask_loss.item() * nTotal)
            n_totals += nTotal

    # Perform backpropatation
    loss.backward()

    # Clip gradients
    _ = nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    # Adjust model weights
    encoder_optimizer.step()
    decoder_optimizer.step()

    return sum(batch_loss) / n_totals


def validate(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding,encoder_optimizer, decoder_optimizer, batch_size, clip,teacher_forcing_ratio, max_length=MAX_LENGTH):
    encoder.train()
    decoder.train()
    # Zero gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    # Set device options
    input_variable = input_variable.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)
    # Lengths for rnn packing should always be on the cpu
    lengths = lengths.to("cpu")

    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0

    # Forward pass through encoder
    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)

    # Create initial decoder input (start with SOS tokens for each sentence)
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)

    # Set initial decoder hidden state to the encoder's final hidden state
    decoder_hidden = encoder_hidden[:decoder.n_layers]

    # Determine if we are using teacher forcing this iteration
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    # Forward batch of sequences through decoder one time step at a time
    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs
            )
            # Teacher forcing: next input is current target
            decoder_input = target_variable[t].view(1, -1)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs
            )
            #decide the top softmax output
            _, topval = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topval[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            # Calculate and accumulate loss
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal

          ##No backprop here 

    return sum(print_losses) / n_totals


# Define Training

In [103]:
# Define Parameters
# Configure training/optimization

#Setup Parameter Based on Best Result Observed in WandB
clip = 25.0
teacher_forcing_ratio = 1.0
learning_rate = 0.00025
decoder_learning_ratio = 3.0
n_iteration = 4000
print_every = 1
save_every = 100
MAX_LENGTH = 10

attn_model = 'general'
hidden_size = 500
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
batch_size = 64

# build models
embedding = nn.Embedding(voc.num_words, hidden_size)
encoder = GRUencoder(hidden_size, embedding, encoder_n_layers, dropout)
decoder = LuongAttnDecoderGRU(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)
# Use appropriate device
encoder = encoder.to(device)
decoder = decoder.to(device)
print('models created')
loss_fn = torch.nn.CrossEntropyLoss(ignore_index=PAD_token)

# Initialize optimizers
print('Change optimizers here')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)

# Ensure dropout layers are in train mode
encoder.train()
decoder.train()



models created
Change optimizers here


LuongAttnDecoderGRU(
  (embedding): Embedding(7836, 500)
  (embedding_dropout): Dropout(p=0.1, inplace=False)
  (gru): GRU(500, 500, num_layers=2, dropout=0.1)
  (concat): Linear(in_features=1000, out_features=500, bias=True)
  (out): Linear(in_features=500, out_features=7836, bias=True)
  (attn): Attn(
    (attn): Linear(in_features=500, out_features=500, bias=True)
  )
)

In [94]:
# training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
#                   ]
# print('tr')
# print(training_batches)
# print('tn')
# b = [random.choice(pairs) for _ in range(5)]
# print(b)
# i = 0
# for batch in train_loader:
#   print(batch)
#   if(i==0):
#     break

In [107]:
#Define Epochs/
total_epochs = 1
batch_size = 64
for epoch in range(1,total_epochs+1):
  # training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
  #                   for _ in range(n_iteration)]
  
  #train
  print_mul = 1
  print_counter = 0
  for i,batch_pair in enumerate(train_loader):    
    input_variable, lengths, target_variable, mask, max_target_len = batch_pair
    # Run a training iteration with batch
    loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder,
    
                  decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, clip,teacher_forcing_ratio)
    print_counter+=1
    print_mul+=1
    if(print_counter == 5):
      print("Epoch: {}| iterations complete: {} | loss at this iteration {}".format(epoch, print_mul*print_counter,loss))
      print_counter = 0
  print('Epoch Training done, Now validating')
  #validate
  for batch_pair in val_loader:
    
    val_loss = 0
    #keep input_variable and variable lenght
    # train_batch = training_batches[iter-1
    tot_loss = 0
    
    #keep input_variable and variable lenght
    # train_batch = training_batches[iter-1

    input_variable, lengths, target_variable, mask, max_target_len = batch_pair
    # Run a training iteration with batch
    loss = validate(input_variable, lengths, target_variable, mask, max_target_len, encoder,
                  decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, clip,teacher_forcing_ratio)
    val_loss += loss
    
  print("Epoch: {}| Percent complete: {:.1f}%| Average loss: {:.4f}| Val loss: {:.4f}".format(epoch, epoch / total_epochs * 100,tot_loss,val_loss))


Epoch: 1| iterations complete: 30 | loss at this iteration 4.4050077812984965
Epoch: 1| iterations complete: 55 | loss at this iteration 4.368306592970111
Epoch: 1| iterations complete: 80 | loss at this iteration 4.1428665204581865
Epoch: 1| iterations complete: 105 | loss at this iteration 4.070235240951325
Epoch: 1| iterations complete: 130 | loss at this iteration 4.194320168243206
Epoch: 1| iterations complete: 155 | loss at this iteration 4.0713642584136025
Epoch: 1| iterations complete: 180 | loss at this iteration 4.214334791670193
Epoch: 1| iterations complete: 205 | loss at this iteration 3.9103517552847586
Epoch: 1| iterations complete: 230 | loss at this iteration 4.207560038185233
Epoch: 1| iterations complete: 255 | loss at this iteration 3.890459941358375
Epoch: 1| iterations complete: 280 | loss at this iteration 4.03373944519823
Epoch: 1| iterations complete: 305 | loss at this iteration 4.363158554621721
Epoch: 1| iterations complete: 330 | loss at this iteration 4.14

RuntimeError: ignored

In [None]:
import torch
from google.colab import drive
drive.mount('/content/gdrive')
# Save the entire model
torch.save(encoder.state_dict, 'encoder_lu_gru_model.pth')
torch.save(decoder.state_dict, 'decoder_lu_gru_model.pth')
enc_path = os.path.join("/content/gdrive/My Drive/data/model", 'encoder_lu_gru_model.pth')
dec_path = os.path.join("/content/gdrive/My Drive/data/model", 'decoder_lu_gru_model.pth')
enc_path_state= os.path.join("/content/gdrive/My Drive/data/model_dict", 'encoder_lu_gru_model.pth')
dec_path_state= os.path.join("/content/gdrive/My Drive/data/model_dict", 'decoder_lu_gru_model.pth')
# Save only the model state dictionary
torch.save(encoder.state_dict(), enc_path_state)
torch.save(encoder,enc_path )
torch.save(decoder.state_dict(), dec_path_state)
torch.save(decoder,dec_path )
