In [1]:
import torch
import torch.nn as nn # the neural network package that contains functions for creating the neural network layers
import torch.nn.functional as F
import torch.optim as optim # a package that allows use to use an optimizer in order to update the parameters during training
from torch.utils.data import DataLoader # allows use to process the data in batches
from torch.nn.utils.rnn import pad_sequence # a function that zero-pads the sentences so they can have equal size in a batch

import pickle
torch.manual_seed(0) # set a random seed for reproducibility
from tqdm import tqdm

In [2]:
batch_size = 8
is_preprocessing_LSTM = False #This toggle preprocessing for LSTM model (not preprocessing for text)

device = torch.device('cuda:0')
#device = torch.device('cpu')
torch.cuda.empty_cache()

###  Sentence boundaries

When dealing with language, it is good to know when a sentence starts and when it ends. That will help the model at the beginning of the prediction, when we don't have any previous words as context. For that purpose, we are going to pad each sentence with a start-of-sentence symbol _"&lt;s>"_ and an end-of-sentence symbol _"&lt;/s>"_. 

Since you already did a similar thing in the n-grams exercise, this function is already implemented for you.

In [3]:
def add_sentence_boundaries(data):
    """
    Takes the data, where each line is a sentence, appends <s> token at the beginning and </s> at the end of each sentence
    Example input: I live in Helsinki
    Example output: <s> I live in Helsinki </s>
    
    Arguments
    ---------
    data : list
            a list of sentences
    
    Returns
    -------
    res : list
            a list of sentences, where each sentence has <s> at the beginning and </s> at the end
    """
    res = []
    for sent in data:
        sent = '<s> ' + sent.rstrip() + ' </s>'
        res.append(sent)
    
    return res

### Index dictionaries <a class="anchor" id="task_1_1"></a> 
Neural networks can't process words as raw strings. Due to that, we need to represent the words with numbers. The first step in doing that is creating two dictionaries: word2idx and idx2word.

The word2idx dictionary contains unique words as keys and unique indices for each of the words as values. <br>
The idx2word dictionary contains unique indices as keys and unique words for each of those indices as values. It is essentially a reversed word2dx, where the keys are the values and the values are the keys.

Example sentences: ["I look forward", "You look forward"] <br>
word2idx = {"I": 1, "look": 2, "forward": 3, "You": 4} <br>
idx2word = {1: "I", 2: "look", 3: "forward", 4: "You"} <br>

Write a function that creates two dictionaries: word2idx and idx2work. The dictionaries should contain all the unique words in the data. <b>The indices should start from 1 and not from 0<b>

In [4]:
def create_indices(data):
    """
    This function creates two dictionaries: word2idx and idx2word, containing each unique word in the dataset
    and its corresponding index.
    Remember that the starting index should be 1 and not 0
    
    Arguments
    ---------
    data - list
            a list of sentences, where each sentence starts with <s>
            and ends with </s> token
    
    Returns
    -------
    word2idx - dictionary
                a dictionary, where the keys are the words and the values are the indices
                
    idx2word - dictionary
                a dictionary, where the keys are the indices and the values are the words
    """
    
    # YOUR CODE HERE
    #raise NotImplementedError()
    word2idx = dict()
    idx2word = dict()
    
    data_list = ''
    for sentence in data:
        data_list = data_list + ' ' + sentence

    data_list = data_list[1:]
    data_split = data_list.split(' ')
    data_unique = []
    for word in data_split:
        if word not in data_unique:
            data_unique.append(word)
            if word not in word2idx.keys():
                 word2idx[word] = data_unique.index(word)+1
    
    for key, value in word2idx.items():
        idx2word[value] = key
    
    return word2idx, idx2word

### Index data <a class="anchor" id="task_1_2"></a>
After we have created the word2idx and idx2word dictionaries, it is time to index the data. In other words, we need to replace each word in the data with its corresponding index.

Write a function that reads each sentence from the data and replaces each word in the sentence with its index from the word2idx dictionary.

In [5]:
def index_data(data, word2idx):
    """
    This function replaces each word in the data with its corresponding index
    
    Arguments
    ---------
    data - list
            a list of sentences, where each sentence starts with <s>
            and ends with </s> token
    
    word2idx - dict
            a dictionary where the keys are the unique words in the data
            and the values are the unique indices corresponding to the words%
    
    Returns
    -------
    data_indexed - list
                a list of sentences, where each word in the sentence is replaced with its index
    """
    
    data_indexed = []
    # YOUR CODE HERE
    #raise NotImplementedError()
    
    for sentence in data:
        sentence_index = []
        for word in sentence.split(' '):
            sentence_index.append(word2idx[word])
        data_indexed.append(sentence_index)
    

    return data_indexed

### Convert sentences to tensors

This function converts each indexed sentence to a LongTensor data type. This is required in order to process it later using Pytorch.

You don't have to modify this function. It is already implemented for you.

In [6]:
def convert_to_tensor(data_indexed):
    """
    This function converts the indexed sentences to LongTensors
    
    Arguments
    ---------
    data_indexed - list
            a list of sentences, where each word in the sentence
            is replaced by its index
    
    Returns
    -------
    tensor_array - list
                a list of sentences, where each sentence
                is a LongTensor
    """
    
    tensor_array = []
    for sent in data_indexed:
        tensor_array.append(torch.LongTensor(sent))    
        
    return tensor_array

### Combine features and labels in a tuple

This function combines each indexed sentence and its corresponding labels to a tuple. This will be beneficial for us when we zero-pad the data later, in order to make the batches have equal-length samples.

You don't have to modify this function. It is already implemented for you.

In [7]:
def combine_data(input_data, labels_data):
    """
    This function converts the input features and the labels into tuples
    where each tuple corresponds to one sentence in the format (features, labels)
    
    Arguments
    ---------
    input_data - list
            a list of tensors containing the training features
    
    labels_data - list
            a list of tensors containing the training labels
    
    Returns
    -------
    res - list
            a list of tuples, where each tuple corresponds to one sentece pair
            in the format (features, labels)
    """
    
    res = []
    
    for i in range(len(input_data)):
        res.append((input_data[i], labels_data[i]))

    return res

### Remove extra data

Since we will be processing the data in equal batches during training, we need to make sure that each batch has equal number of sentences. In case the last batch contains less sentences than the batch size, that batch will be discarded.

This function discards the extra data that doesn't fit in a batch.

You don't have to modify this function. It is already implemented for you.

In [8]:
def remove_extra(data, batch_size):
    """
    This function removes the extra data that does not fit in a batch   
    
    Arguments
    ---------
    data - list
            a list of tuples, where each tuple corresponds to a
            sentence in a format (features, labels)
            
    batch_size - integer
                    the size of the batch
    
    
    Returns
    -------
    data - list
            a list of tuples, where each tuple corresponds to a
            sentence in a format (features, labels)
    """
    
    extra = len(data) % batch_size
    if extra != 0:
        data = data[:-extra][:]

    return data

### Zero-pad the data

In order to process the data in batches, we need to make sure that the sentences in each batch have equal lengths. Since we are working with sentences, each sentence in a batch can have different number of words. In this case, we need to  make the length of each sentence the same as the length of the longest sentence in that batch. We do that by adding zeros at the end of each sentence, until the sentence has equal length as the longest one in the batch.

This function implements the zero-padding.

You don't have to modify this function. It is already implemented for you.

In [9]:
def collate(list_of_samples):
    """
    This function zero-pads the training data in order to process the sentences
    in a batch during training
    
    Arguments
    ---------
    list_of_samples - list
                        a list of tuples, where each tuple corresponds to a
                        sentence in a format (features, labels)
    
    
    Returns
    -------
    pad_input_data - tensor
                        a tensor of input features equal to the batch size,
                        where features are zero-padded to have equal lengths
                        
    input_data_lengths - list
                        a list where each element is the length of the 
                        corresponding sentence
    
    pad_labels_data - tensor
                        a tensor of labels equal to the batch size,
                        where labels are zero-padded to have equal lengths
            
    """
    
    
    list_of_samples.sort(key=lambda x: len(x[0]), reverse=True)
    input_data, labels_data = zip(*list_of_samples)

    input_data_lengths = [len(seq) for seq in input_data]
    
    padding_value = 0

    # pad input
    pad_input_data = pad_sequence(input_data, padding_value=padding_value)
    
    # pad labels
    pad_labels_data = pad_sequence(labels_data, padding_value=padding_value)

    return pad_input_data, input_data_lengths, pad_labels_data

### Prepare features and labels <a class="anchor" id="task_1_3"></a> 
During training, the model takes an input word and outputs a prediction. We will need to compare this prediction to 'true label'. True label is just the next word in the text, but we will need to organize the data, so that every word in the text is considered as this 'true label'.

In the label sentence, every word is moved a step in time, and for the input sentence the last word is missing. 

Example sentence: oops i did it again <br>
INPUT: oops i did it <br>
LABEL: i did it again

Note: the first word in the sentence is start-of-sentence symbol and the last one is end-of-sentence symbol.

Write a function that takes as input the indexed data and returns two arrays: the input array where the last word from each sentence is missing, and the label array, where every word is moved a step in time.

In [10]:
def prepare_for_training(data_indexed):
    """
    This function creates the input features and their corresponding labels
    
    Arguments
    ---------
    data_indexed - list
            a list of sentences, where each word in the sentence
            is replaced by its index
    
    
    Returns
    -------
    input_data - list
            a list of indexed sentences, where the last element of each sentence is removed
            
    labels_data - list
            a list of indexed sentences, where the first element of each sentence is removed
    """
    
    input_data = []
    labels_data = []

     # YOUR CODE HERE
    #raise NotImplementedError()
    for data in data_indexed:    
        input_data.append(data[:-1])
        labels_data.append(data[1:])
    
    return input_data, labels_data

### Preprocess data <a class="anchor" id="task_1_4"></a>
At this point, we have all the necessary functions to prepare the data for training. What is left to do is to run them one by one and get the data in the desired format.

Write a function that takes the data and prepares it for training. You need to do the following steps:

    1. Add sentence boundaries
    2. Create index dictionaries (word2idx and idx2word)
    3. Index the data in a way that each word is replaced by its index
    4. Convert the indexed data to a list of tensors, where each tensor is a sentence
    5. Split each sentence to input and labels

In [11]:
def preprocess_data(data):
    """
    This function runs the whole preprocessing pipeline and returns the prepared
    input features and labels, along with the word2idx and idx2word dictionaries
    
    Arguments
    ---------
    data - list
            a list of sentences that need to be prepared for training
    
    
    Returns
    -------
    input_data - list
            a list of tensors, where each tensor is an indexed sentence used as input feature
            
    labels_data - list
            a list of tensors, where each tensor is an indexed sentence used as a true label
    
    word2idx - dictionary
                a dictionary, where the keys are the words and the values are the indices
                
    idx2word - dictionary
                a dictionary, where the keys are the indices and the values are the words
    """
    
    # YOUR CODE HERE
    #raise NotImplementedError()
    #1. Add sentence boundaries    
    res = add_sentence_boundaries(data)
    
    #2. Create index dictionaries (word2idx and idx2word)
    word2idx, idx2word = create_indices(res)    
    
    #3. Index the data in a way that each word is replaced by its index
    indexed_data = index_data(res, word2idx)
    
    #4. Convert the indexed data to a list of tensors, where each tensor is a sentence
    tensor_array = convert_to_tensor(indexed_data)    
    
    #5. Split each sentence to input and labels
    input_data, labels_data = prepare_for_training(tensor_array)
    
    return input_data, labels_data, word2idx, idx2word

In [12]:
# Load data
# Load result:
with open("data_sample.txt", "rb") as fp:   # Unpickling
    sentences = pickle.load(fp)

print(sentences[22:35])

['We know you love Chewy.', "We know you're here.", "We know you know the Chewy-RyanCohen-GameStop connection, but it wasn't real enough for you yet.", "Well, I don't have to tell you, because you're not stupid, but I will anyway:  it's gotten really real enough for you now.", 'The Chewy executive triumvirate joining the GameStop board of directors is your signal, friend.', 'You may start pumping GME to your boomer audience.', 'Now.', "I don't believe reddit has been too kind to you in the past, but worry not, follow through with this and you'll have lots of friends here and we'll have your back forever.", 'Well, definitely not forever, but at least for a while.', 'What better time to start than today?', 'With love, brother.', 'P.S.', "- don't be afraid to use the rocket 🚀, it feels good."]


In [13]:
if is_preprocessing_LSTM:
    train_input, train_labels, word2idx, idx2word = preprocess_data(sentences) # run the preprocessing pipeline
    train_data = combine_data(train_input, train_labels)
    train_data = remove_extra(train_data, batch_size)
    torch.save(train_input, "LSTM_data/train_input.pt")
    torch.save(train_labels, "LSTM_data/train_labels.pt")
    torch.save(word2idx, "LSTM_data/word2idx.pt")
    torch.save(idx2word, "LSTM_data/idx2word.pt")
    torch.save(train_data, "LSTM_data/train_data.pt")
else:
    train_input = torch.load('LSTM_data/train_input.pt')
    train_labels = torch.load('LSTM_data/train_labels.pt')
    word2idx = torch.load('LSTM_data/word2idx.pt')
    idx2word = torch.load('LSTM_data/idx2word.pt')
    train_data = combine_data(train_input, train_labels)
    train_data = remove_extra(train_data, batch_size)
    #train_data = torch.load('LSTM_data/train_data.pt')

In [14]:
pairs_batch_train = DataLoader(dataset=train_data,
                    batch_size=batch_size,
                    shuffle=True,
                    collate_fn=collate,
                    pin_memory=True)

In [15]:
class RNN(nn.Module):
    def __init__(self, word2idx, embed_dim, context_dim, num_layers):
        """
        This function initializes the layers of the model
        
        Arguments
        ---------
        word2idx - dictionary
                    a dictionary where the keys are the unique words in the data
                    and the values are the unique indices corresponding to the words
        
        embed_dim - integer
                        the size of the word embeddings

        context_dim - integer
                        the dimension of the hidden size
                        
        num_layers - integer
                        the number of layers in the GRU cell
        """
        super(RNN, self).__init__()
        self.word2idx = word2idx
        self.embed_dim = embed_dim
        self.context_dim = context_dim
        self.num_layers = num_layers
        
        # here we initialise weighs of a model
        self.word_embed = nn.Embedding(len(self.word2idx)+1, self.embed_dim) # embedding layer
        self.gru = nn.GRU(self.embed_dim, self.context_dim, num_layers=self.num_layers) # GRU cell        
        self.dropout = nn.Dropout(0.1) # Dropout        
        self.out = nn.Linear(self.context_dim, len(self.word2idx)+1) # output layer

    
    def forward(self, word, hidden):
        """
        This function implements the forward pass of the model
        
        Arguments
        ---------
        word - tensor
                a tensor containing indices of the words in a batch
                
        hidden - tensor
                    the previous hidden state of the GRU model
        
        Returns
        -------
        output - tensor
                    a tensor of logits from the linear transformation
        
        hidden - tensor
                    the current hidden state of the GRU model
        """ 
        
        # YOUR CODE HERE
        #raise NotImplementedError()
        #1. Replace the indexed word with its embedding vector. 
        #In other words, pass it through the embedding layer
        embeds = self.word_embed(word)
        
        batch_size = word.shape[0]
        #print(batch_size)
        #2. Reshape the embedding vector to a shape of (1, batch_size)
        embeds = embeds.reshape(1, batch_size, self.embed_dim)
        
        #3. Pass the embedding through the GRU cell to get the output 
        #and the hidden tensors. The GRU function takes as input the 
        #work embedding and the previous hidden state.
        output, hidden = self.gru(embeds, hidden)
        
        #4. Addpy a dropout to the output of the GRU
        output = self.dropout(output)
        
        #5. Apply the linear transformation to the output of the dropout layer.
        output = self.out(output)
        
        #6. Reshape the output to have a shape (batch_size, vocab_length+1)
        output = output.reshape(batch_size, len(self.word2idx) + 1)
        
        #7. Return the output of the linear transformation and the hidden tensor
        hidden = hidden.to(device)
        return output, hidden

In [16]:
n_epochs = 150 # the number of epochs to train
embed_dim = 300 # the size of the embedding
hidden_size = 450 # the size of the hidden state
num_layers = 1 # the number of layers in the GRU cell
rnn_model = RNN(word2idx, embed_dim, hidden_size, num_layers).to(device) # initialize the RNN model
loss_function = nn.CrossEntropyLoss(ignore_index=0) # define the loss function
rnn_optimizer = optim.Adam(rnn_model.parameters(), lr=0.001) # define the optimizer

In [17]:
def train_rnn(pairs_batch_train, rnn_model, hidden_size, num_layers, loss_function, rnn_optimizer, n_epochs):
    """
    This function implements the training of the model

    Arguments
    ---------
    pairs_batch_train - object
                            a DataLoader object that contains the batched data

    rnn_model - object
                an RNN object that contains the initialized model
                
    hidden_size - integer
                    the size of the hidden layer (the context size)
    
    num_layers - integer
                        the number of layers in the GRU cell

    loss_function - object
                        the CrossEntropy loss function

    rnn_optimizer - object
                        an Adam object of the optimizer class

    n_epochs - integer
                the number of epochs to train
    """ 

    for epoch in tqdm(range(n_epochs)): # iterate over the epochs
        epoch_loss = 0
        rnn_model.train() # put the model in training mode
        
        for iteration, batch in enumerate(pairs_batch_train): # at each step take a batch of sentences
            sent_loss = 0
            rnn_optimizer.zero_grad() # clear gradients
            
            train_input, train_input_lengths, train_labels = batch # extract the data from the batch
            train_input = train_input.to(device)
            #train_input_lengths = train_input_lengths.to(device) #this is a list
            train_labels = train_labels.to(device)
            
            hidden = torch.zeros((num_layers, train_input.size(1), hidden_size)) # initialize the hidden state
            
            hidden = hidden.to(device)
            
            for i in range(train_input.size(0)): # iterate over the word in the sentence
                output, hidden = rnn_model(train_input[i], hidden) # forward pass               
                    
                labels = torch.LongTensor(train_labels.size(1)) # define a random tensor with batch_size as number of elements
                labels = labels.to(device)
                labels[:] = train_labels[i][:] # put the correct label values in the tensor
                
                sent_loss += loss_function(output, labels) # compute the loss, compare the predictions and the labels

            sent_loss.backward() # compute the backward pass
            rnn_optimizer.step() # update the parameters

            epoch_loss += sent_loss
            
        print('Epoch: {}   Loss: {}'.format(epoch+1, epoch_loss / len(pairs_batch_train))) # print the loss at each epoch
        filename = "RNN_00005" + str(epoch+1) + '.pt'
        torch.save(rnn_model.state_dict(), filename)

In [18]:
train_rnn(pairs_batch_train, rnn_model,hidden_size, num_layers, loss_function, rnn_optimizer, n_epochs)

  1%|▌                                                                             | 1/150 [10:17<25:33:40, 617.59s/it]

Epoch: 1   Loss: 280.58489990234375


  1%|█                                                                             | 2/150 [20:46<25:39:11, 624.00s/it]

Epoch: 2   Loss: 225.30166625976562


  2%|█▌                                                                            | 3/150 [31:05<25:23:38, 621.89s/it]

Epoch: 3   Loss: 168.77413940429688


  3%|██                                                                            | 4/150 [41:54<25:39:35, 632.71s/it]

Epoch: 4   Loss: 124.60802459716797


  3%|██▌                                                                           | 5/150 [53:13<26:09:07, 649.29s/it]

Epoch: 5   Loss: 99.58843231201172


  4%|███                                                                         | 6/150 [1:04:14<26:07:58, 653.33s/it]

Epoch: 6   Loss: 85.2634048461914


  5%|███▌                                                                        | 7/150 [1:14:24<25:23:12, 639.11s/it]

Epoch: 7   Loss: 75.74676513671875


  5%|████                                                                        | 8/150 [1:24:44<24:58:24, 633.13s/it]

Epoch: 8   Loss: 68.05842590332031


  6%|████▌                                                                       | 9/150 [1:36:00<25:19:10, 646.46s/it]

Epoch: 9   Loss: 61.71651077270508


  7%|█████                                                                      | 10/150 [1:47:05<25:21:57, 652.27s/it]

Epoch: 10   Loss: 58.01473617553711


  7%|█████▌                                                                     | 11/150 [1:57:19<24:43:31, 640.37s/it]

Epoch: 11   Loss: 54.75751876831055


  8%|██████                                                                     | 12/150 [2:07:22<24:06:45, 629.03s/it]

Epoch: 12   Loss: 52.01997375488281


  9%|██████▌                                                                    | 13/150 [2:17:44<23:51:44, 627.04s/it]

Epoch: 13   Loss: 49.96446228027344


  9%|███████                                                                    | 14/150 [2:28:20<23:46:55, 629.53s/it]

Epoch: 14   Loss: 48.16546630859375


 10%|███████▌                                                                   | 15/150 [2:38:15<23:13:13, 619.21s/it]

Epoch: 15   Loss: 47.073646545410156


 11%|████████                                                                   | 16/150 [2:48:20<22:53:12, 614.87s/it]

Epoch: 16   Loss: 46.673465728759766


 11%|████████▌                                                                  | 17/150 [2:58:16<22:30:49, 609.40s/it]

Epoch: 17   Loss: 45.77009201049805


 12%|█████████                                                                  | 18/150 [3:08:12<22:11:24, 605.18s/it]

Epoch: 18   Loss: 45.391292572021484


 13%|█████████▌                                                                 | 19/150 [3:18:01<21:51:08, 600.52s/it]

Epoch: 19   Loss: 45.047569274902344


 13%|██████████                                                                 | 20/150 [3:27:49<21:33:00, 596.77s/it]

Epoch: 20   Loss: 44.887874603271484


 14%|██████████▌                                                                | 21/150 [3:37:38<21:18:07, 594.47s/it]

Epoch: 21   Loss: 45.310333251953125


 15%|███████████                                                                | 22/150 [3:47:26<21:03:39, 592.34s/it]

Epoch: 22   Loss: 45.464622497558594


 15%|███████████▌                                                               | 23/150 [3:57:15<20:51:35, 591.30s/it]

Epoch: 23   Loss: 46.571075439453125


 16%|████████████                                                               | 24/150 [4:07:06<20:41:48, 591.34s/it]

Epoch: 24   Loss: 47.21337127685547


 17%|████████████▌                                                              | 25/150 [4:16:56<20:31:15, 591.00s/it]

Epoch: 25   Loss: 47.92580032348633


 17%|█████████████                                                              | 26/150 [4:26:45<20:19:47, 590.23s/it]

Epoch: 26   Loss: 48.28879165649414


 18%|█████████████▌                                                             | 27/150 [4:36:33<20:08:48, 589.67s/it]

Epoch: 27   Loss: 49.132362365722656


 19%|██████████████                                                             | 28/150 [4:46:23<19:58:51, 589.60s/it]

Epoch: 28   Loss: 50.625755310058594


 19%|██████████████▌                                                            | 29/150 [4:56:11<19:48:34, 589.38s/it]

Epoch: 29   Loss: 55.365055084228516


 20%|███████████████                                                            | 30/150 [5:06:01<19:38:33, 589.28s/it]

Epoch: 30   Loss: 51.389801025390625


 21%|███████████████▌                                                           | 31/150 [5:15:48<19:27:33, 588.68s/it]

Epoch: 31   Loss: 52.08329772949219


 21%|████████████████                                                           | 32/150 [5:25:38<19:18:24, 589.02s/it]

Epoch: 32   Loss: 57.5995979309082


 22%|████████████████▌                                                          | 33/150 [5:35:27<19:08:55, 589.19s/it]

Epoch: 33   Loss: 52.019981384277344


 23%|█████████████████                                                          | 34/150 [5:45:13<18:57:25, 588.32s/it]

Epoch: 34   Loss: 51.36544418334961


 23%|█████████████████▌                                                         | 35/150 [5:55:01<18:46:55, 587.96s/it]

Epoch: 35   Loss: 53.97654724121094


 24%|██████████████████                                                         | 36/150 [6:04:49<18:37:06, 587.96s/it]

Epoch: 36   Loss: 60.09857177734375


 25%|██████████████████▌                                                        | 37/150 [6:14:38<18:28:18, 588.49s/it]

Epoch: 37   Loss: 73.1925277709961


 25%|███████████████████                                                        | 38/150 [6:24:27<18:18:33, 588.51s/it]

Epoch: 38   Loss: 71.34232330322266


 26%|███████████████████▌                                                       | 39/150 [6:34:16<18:08:53, 588.59s/it]

Epoch: 39   Loss: 70.45771026611328


 27%|████████████████████                                                       | 40/150 [6:44:03<17:58:09, 588.08s/it]

Epoch: 40   Loss: 71.49202728271484


 27%|████████████████████▌                                                      | 41/150 [6:53:50<17:48:02, 587.91s/it]

Epoch: 41   Loss: 70.97381591796875


 28%|█████████████████████                                                      | 42/150 [7:03:38<17:38:14, 587.91s/it]

Epoch: 42   Loss: 74.9870376586914


 29%|█████████████████████▌                                                     | 43/150 [7:13:27<17:29:03, 588.25s/it]

Epoch: 43   Loss: 71.26129913330078


 29%|██████████████████████                                                     | 44/150 [7:23:17<17:20:07, 588.75s/it]

Epoch: 44   Loss: 66.70741271972656


 30%|██████████████████████▌                                                    | 45/150 [7:33:03<17:09:02, 588.02s/it]

Epoch: 45   Loss: 65.69152069091797


 31%|███████████████████████                                                    | 46/150 [7:42:54<17:00:24, 588.70s/it]

Epoch: 46   Loss: 68.58171844482422


 31%|███████████████████████▌                                                   | 47/150 [7:52:41<16:50:13, 588.48s/it]

Epoch: 47   Loss: 77.31651306152344


 32%|████████████████████████                                                   | 48/150 [8:02:30<16:40:27, 588.50s/it]

Epoch: 48   Loss: 80.95665740966797


 33%|████████████████████████▌                                                  | 49/150 [8:12:18<16:30:37, 588.49s/it]

Epoch: 49   Loss: 95.46806335449219


 33%|█████████████████████████                                                  | 50/150 [8:22:08<16:21:13, 588.74s/it]

Epoch: 50   Loss: 86.12346649169922


 34%|█████████████████████████▌                                                 | 51/150 [8:31:57<16:11:46, 588.95s/it]

Epoch: 51   Loss: 87.65180206298828


 35%|██████████████████████████                                                 | 52/150 [8:41:46<16:02:04, 589.02s/it]

Epoch: 52   Loss: 87.52787017822266


 35%|██████████████████████████▌                                                | 53/150 [8:51:35<15:52:02, 588.89s/it]

Epoch: 53   Loss: 86.48388671875


 36%|███████████████████████████                                                | 54/150 [9:01:25<15:42:53, 589.31s/it]

Epoch: 54   Loss: 86.64820861816406


 37%|███████████████████████████▍                                               | 55/150 [9:11:16<15:33:29, 589.58s/it]

Epoch: 55   Loss: 95.76700592041016


 37%|████████████████████████████                                               | 56/150 [9:21:04<15:23:20, 589.37s/it]

Epoch: 56   Loss: 97.14685821533203


 38%|████████████████████████████▌                                              | 57/150 [9:30:54<15:13:41, 589.47s/it]

Epoch: 57   Loss: 99.8570785522461


 39%|█████████████████████████████                                              | 58/150 [9:40:39<15:01:58, 588.24s/it]

Epoch: 58   Loss: 99.42066192626953


 39%|█████████████████████████████▌                                             | 59/150 [9:50:28<14:52:23, 588.39s/it]

Epoch: 59   Loss: 100.92277526855469


 40%|█████████████████████████████▌                                            | 60/150 [10:00:14<14:41:34, 587.72s/it]

Epoch: 60   Loss: 97.43387603759766


 41%|██████████████████████████████                                            | 61/150 [10:10:08<14:34:12, 589.35s/it]

Epoch: 61   Loss: 96.98050689697266


 41%|██████████████████████████████▌                                           | 62/150 [10:19:58<14:24:49, 589.65s/it]

Epoch: 62   Loss: 95.04287719726562


 42%|███████████████████████████████                                           | 63/150 [10:29:46<14:14:27, 589.28s/it]

Epoch: 63   Loss: 92.36700439453125


 43%|███████████████████████████████▌                                          | 64/150 [10:39:37<14:05:22, 589.79s/it]

Epoch: 64   Loss: 89.00550842285156


 43%|████████████████████████████████                                          | 65/150 [10:49:26<13:55:07, 589.50s/it]

Epoch: 65   Loss: 87.59112548828125


 44%|████████████████████████████████▌                                         | 66/150 [10:59:14<13:44:30, 588.93s/it]

Epoch: 66   Loss: 96.00997924804688


 45%|█████████████████████████████████                                         | 67/150 [11:09:05<13:35:35, 589.58s/it]

Epoch: 67   Loss: 99.48390197753906


 45%|█████████████████████████████████▌                                        | 68/150 [11:18:55<13:25:53, 589.68s/it]

Epoch: 68   Loss: 92.53302764892578


 46%|██████████████████████████████████                                        | 69/150 [11:28:42<13:15:12, 589.04s/it]

Epoch: 69   Loss: 90.38249969482422


 47%|██████████████████████████████████▌                                       | 70/150 [11:38:32<13:05:39, 589.25s/it]

Epoch: 70   Loss: 93.33223724365234


 47%|███████████████████████████████████                                       | 71/150 [11:48:19<12:54:47, 588.45s/it]

Epoch: 71   Loss: 91.8568115234375


 48%|███████████████████████████████████▌                                      | 72/150 [11:58:06<12:44:38, 588.19s/it]

Epoch: 72   Loss: 88.2503890991211


 49%|████████████████████████████████████                                      | 73/150 [12:07:57<12:35:53, 589.00s/it]

Epoch: 73   Loss: 88.21111297607422


 49%|████████████████████████████████████▌                                     | 74/150 [12:17:46<12:26:12, 589.11s/it]

Epoch: 74   Loss: 89.57752990722656


 50%|█████████████████████████████████████                                     | 75/150 [12:27:37<12:17:02, 589.63s/it]

Epoch: 75   Loss: 87.11522674560547


 51%|█████████████████████████████████████▍                                    | 76/150 [12:37:28<12:07:33, 589.91s/it]

Epoch: 76   Loss: 84.998291015625


 51%|█████████████████████████████████████▉                                    | 77/150 [12:47:17<11:57:25, 589.66s/it]

Epoch: 77   Loss: 90.22279357910156


 52%|██████████████████████████████████████▍                                   | 78/150 [12:57:07<11:47:53, 589.91s/it]

Epoch: 78   Loss: 89.26412963867188


 53%|██████████████████████████████████████▉                                   | 79/150 [13:06:58<11:38:22, 590.17s/it]

Epoch: 79   Loss: 94.3436508178711


 53%|███████████████████████████████████████▍                                  | 80/150 [13:16:51<11:29:22, 590.90s/it]

Epoch: 80   Loss: 90.99979400634766


 54%|███████████████████████████████████████▉                                  | 81/150 [13:26:48<11:21:46, 592.85s/it]

Epoch: 81   Loss: 94.06163787841797


 55%|████████████████████████████████████████▍                                 | 82/150 [13:36:59<11:18:06, 598.34s/it]

Epoch: 82   Loss: 98.6934814453125


 55%|████████████████████████████████████████▉                                 | 83/150 [13:47:09<11:11:47, 601.60s/it]

Epoch: 83   Loss: 96.24359893798828


 55%|████████████████████████████████████████▉                                 | 83/150 [13:54:12<11:13:23, 603.04s/it]


KeyboardInterrupt: 

In [20]:
def predict_rnn(rnn_model, hidden_size, num_layers, word2idx, idx2word, context, max_len):
    """
    This function predicts the next word, based on the history of the previous words.
    We start with the 'context' and then feed the prediction as the next input.
    
    Arguments
    ---------
    rnn_model - object
                an RNN object that contains the trained model
                
    hidden_size - integer
                    the size of the hidden layer (the context size)
                    
    num_layers - integer
                    the number of layers in the GRU cell
                
    word2idx - dictionary
                    a dictionary where the keys are the unique words in the data
                    and the values are the unique indices corresponding to the words
                    
    idx2word - dictionary
                a dictionary, where the keys are the indices and the values are the words
                    
    context - string
                the context sentence
    
    max_len - integer
                integer value representing up to how many words to generate
                            
    Returns
    -------
    
    predictions - string
                    a string containing the generated sentence
    """
    
    # index the context
    context_indexed = []
    for word in context.split():
        word_indexed = torch.LongTensor(1)
        word_indexed[:] = word2idx[word]
        context_indexed.append(word_indexed)
    
    with torch.no_grad():
        predictions = []
        # first build the hidden state from the context
        hidden = torch.zeros((num_layers, 1, hidden_size), device=device)
        for word in context_indexed:
            predictions.append(idx2word[word.item()])
            word = word.to(device)
            output, hidden = rnn_model(word, hidden)
            
        next_input = context_indexed[-1]
        while((len(predictions) < max_len) and (predictions[-1] != '</s>')):
            
            # YOUR CODE HERE
            #raise NotImplementedError()
            #1. Run the forward pass to get the output. Don't forget to include the `hidden` state
            #print("INPUT", next_input)
            next_input = next_input.to(device)
            out, hidden = rnn_model.forward(next_input, hidden)
            
            #2. Run the output through a softmax to convert it to a probability distribution (`F.softmax`)
            out = F.softmax(out)
            
            #3. Get the word with the highest probability using the `topk(1)` function
            value, index = out.topk(1)
            
            #4. Convert the index of the predicted word to the actual word using the idx2word dictionary
            word = idx2word[index.item()]
            
            #5. Append the predicted word to the `predictions` array
            predictions.append(word)
            next_input = index
            
    predictions = ' '.join(predictions)
    
    return predictions

In [27]:
contexts = ['<s> to the moon', '<s> GME', '<s> my wife', '<s> Elon Musk', '<s> the best stock is', '<s> I think the market will']
max_len = 50

for context in contexts:
    predictions = predict_rnn(rnn_model, hidden_size, num_layers, word2idx, idx2word, context, max_len)
    print(predictions)
    print('\n')

<s> to the moon at the game downside group might be profitable for the retail else sales in limiting the market is still from the future soon. </s>


<s> GME Fuzzy numbers calls and says try to buy corporate bond that bond sectors is enough to decision that he needs help. </s>


<s> my wife does not prevent that well, not. </s>


<s> Elon Musk Has Made Millionaires train Chinese train and says it says to bear market open. </s>


<s> the best stock is extremely unlikely, is not huge and closed a huge blow for the market. </s>


<s> I think the market will pump the fed from the future wave as well as well as invested as the as the of the public gain as indicators of control on the market. </s>






In [19]:
#torch.save(rnn_model.state_dict(), "rnn_epoch_83_lr0001.pt")