In [None]:
!nvidia-smi

Mon Nov  8 09:45:10 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   68C    P8    31W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import torch
from torch import nn
import pandas as pd
import numpy as np


In [None]:


d=pd.read_csv("./dataset.csv" , index_col=False)

text= d["text"].tolist()


# Join all the sentences together 
chars = set(''.join(text))

#  maps integers to the characters
int2char = dict(enumerate(chars))
char2int = {char: ind for ind, char in int2char.items()}

In [None]:
print(char2int)

{'r': 0, 'l': 1, 'd': 2, 'u': 3, 'W': 4, 'h': 5, 't': 6, 'k': 7, 'E': 8, 'v': 9, 'n': 10, 'f': 11, 'y': 12, 'o': 13, 'g': 14, ' ': 15, 'i': 16, 's': 17, 'C': 18, 'e': 19, 'a': 20, 'G': 21, 'T': 22, 'c': 23, 'm': 24, 'w': 25}


In [None]:
maxlen = len(max(text, key=len))
print("The longest string has {} characters".format(maxlen))


The longest string has 33 characters


In [None]:
# Padding
for i in range(len(text)):
    while len(text[i])<maxlen:
        text[i] += ' '

In [None]:
# Creating lists  input and target sequences
input_seq = []
target_seq = []

for i in range(len(text)):
    # Remove last character 
    input_seq.append(text[i][:-1])
    # Remove first character 
    target_seq.append(text[i][1:])
    print("Input Sequence: {}\nTarget Sequence: {}".format(input_seq[i], target_seq[i]))

Input Sequence: We drink coffee every morning   
Target Sequence: e drink coffee every morning    
Input Sequence:  how are you                    
Target Sequence: how are you                     
Input Sequence: They do not go to school tomorro
Target Sequence: hey do not go to school tomorrow
Input Sequence: Good night                      
Target Sequence: ood night                       
Input Sequence: Cats hate water                 
Target Sequence: ats hate water                  
Input Sequence: The train every morning         
Target Sequence: he train every morning          
Input Sequence: Every child likes an ice cream  
Target Sequence: very child likes an ice cream   


In [None]:
#1a
for i in range(len(text)):
    input_seq[i] = [char2int[character] for character in input_seq[i]]
    target_seq[i] = [char2int[character] for character in target_seq[i]]

In [None]:
dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(text)

def one_hot_encode(sequence, dict_size, seq_len, batch_size):
    # Creating a multi-dimensional array of zeros with the desired output shape
    features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)
                     
    for i in range(batch_size):
        for u in range(seq_len):
            features[i, u, sequence[i][u]] = 1
    return features

In [None]:
                                                 #*****
input_seq = one_hot_encode(input_seq, dict_size, seq_len   , batch_size)
print("Input shape: {} --> (Batch Size, Sequence Length, One-Hot Encoding Size)".format(input_seq.shape))

Input shape: (7, 32, 26) --> (Batch Size, Sequence Length, One-Hot Encoding Size)


In [None]:

is_cuda = torch.cuda.is_available()
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU is available


In [None]:
class Model(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(Model, self).__init__()

        #  parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        #Defining the layers
        # RNN Layer
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
        self.rnn1 = nn.RNN(hidden_dim, hidden_dim, n_layers, batch_first=True) 
        self.rnn2 = nn.RNN(hidden_dim, hidden_dim, n_layers, batch_first=True)    
       
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x):
        
        batch_size = x.size(0)

        
        hidden = self.init_hidden(batch_size)

        out, hidden = self.rnn(x, hidden)
        out, hidden = self.rnn2(out, hidden)
        out, hidden = self.rnn1(out, hidden)
        
        
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size):

        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
       
        return hidden

In [None]:
# Instantiate the model with hyperparameters
model = Model(input_size=dict_size, output_size=dict_size, hidden_dim=12, n_layers=1)

model = model.to(device)
#  hyperparameters
n_epochs = 400
lr=0.01

#  Loss, Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

In [None]:
# Training Run
input_seq = input_seq.to(device)
for epoch in range(1, n_epochs + 1):
    optimizer.zero_grad() 
    output, hidden = model(input_seq)
    output = output.to(device)
    target_seq = target_seq.to(device)
    loss = criterion(output, target_seq.view(-1).long())
    loss.backward() 
    optimizer.step() 
    
    if epoch%10 == 0:
        print('Epoch: {}/{}....'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))

Epoch: 10/400.... Loss: 2.3584
Epoch: 20/400.... Loss: 2.2342
Epoch: 30/400.... Loss: 2.1715
Epoch: 40/400.... Loss: 2.0661
Epoch: 50/400.... Loss: 1.8646
Epoch: 60/400.... Loss: 1.6490
Epoch: 70/400.... Loss: 1.4699
Epoch: 80/400.... Loss: 1.3203
Epoch: 90/400.... Loss: 1.1740
Epoch: 100/400.... Loss: 1.0559
Epoch: 110/400.... Loss: 0.9331
Epoch: 120/400.... Loss: 0.8206
Epoch: 130/400.... Loss: 0.7312
Epoch: 140/400.... Loss: 0.6525
Epoch: 150/400.... Loss: 0.5834
Epoch: 160/400.... Loss: 0.5122
Epoch: 170/400.... Loss: 0.4478
Epoch: 180/400.... Loss: 0.3952
Epoch: 190/400.... Loss: 0.3514
Epoch: 200/400.... Loss: 0.3137
Epoch: 210/400.... Loss: 0.2802
Epoch: 220/400.... Loss: 0.2512
Epoch: 230/400.... Loss: 0.2256
Epoch: 240/400.... Loss: 0.2029
Epoch: 250/400.... Loss: 0.1839
Epoch: 260/400.... Loss: 0.1738
Epoch: 270/400.... Loss: 0.2177
Epoch: 280/400.... Loss: 0.1723
Epoch: 290/400.... Loss: 0.1406
Epoch: 300/400.... Loss: 0.1223
Epoch: 310/400.... Loss: 0.1096
Epoch: 320/400...

In [None]:
def predict(model, character):
    character = np.array([[char2int[c] for c in character]])
    character = one_hot_encode(character, dict_size, character.shape[1], 1)
    character = torch.from_numpy(character)
    character = character.to(device)
    
    out, hidden = model(character)

    prob = nn.functional.softmax(out[-1], dim=0).data
   
    char_ind = torch.max(prob, dim=0)[1].item()

    return int2char[char_ind], hidden


In [None]:
def sample(model, out_len, start='hey'):
    model.eval() 
    start = start.lower()
    
    chars = [ch for ch in start]
    size = out_len - len(chars)
    
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)

    return ''.join(chars)

In [None]:
sample(model, 10, 'drink')

'drink coff'