In [32]:
import torch
import torch.nn as nn
from utils import TextProcess, TextGenerator

In [33]:
embed_size = 128    #Input features to the LSTM
hidden_size = 1024  #Number of LSTM units
num_layers = 1
num_epochs = 20
batch_size = 20
timesteps = 30
learning_rate = 0.002

In [34]:
corpus = TextProcess()

In [35]:
rep_tensor = corpus.get_data('alice.txt', batch_size=batch_size)

In [36]:
vocab_size = len(corpus.dictionary)
num_batches = rep_tensor.shape[1]//timesteps

In [37]:
rep_tensor.shape

torch.Size([20, 1652])

In [38]:
embed_size

128

In [39]:
timesteps

30

In [40]:
# Get model
model = TextGenerator(vocab_size, embed_size, hidden_size, num_layers)

In [41]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [42]:
import time

start = time.time()
print("Start training the model")
for epoch in range(num_epochs):
    # Set zeros to be the initial hidden and cell states
    states = (torch.zeros(num_layers, batch_size, hidden_size),
             torch.zeros(num_layers, batch_size, hidden_size))
    for i in range(0, rep_tensor.size(1) - timesteps, timesteps):
        # Get mini-batch inputs and targets
        inputs = rep_tensor[:, i:i+timesteps]
        targets = rep_tensor[:, (i+1):(i+1)+timesteps]
        # 600 = 20 * 30 = batch_size * timesteps
        outputs, _ = model(inputs, states) # outputs is 600x1
        loss = loss_fn(outputs, targets.reshape(-1)) # reshape to be 600
        
        # Backpropagation and Weight Update
        model.zero_grad()
        loss.backward()
        
        # Add gradient clipping to prevent `Gradient Vanishing`
        nn.utils.clip_grad_norm(model.parameters(), 0.5)
        optimizer.step()
        
        step = (i+1) // timesteps
        if step % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}")
end = time.time()
print(f"{end - start}: seconds")

Start training the model




Epoch [1/20], Loss: 8.69544792175293
Epoch [2/20], Loss: 6.0996551513671875
Epoch [3/20], Loss: 5.2865471839904785
Epoch [4/20], Loss: 4.697013854980469
Epoch [5/20], Loss: 4.160508632659912
Epoch [6/20], Loss: 3.556269884109497
Epoch [7/20], Loss: 3.146498918533325
Epoch [8/20], Loss: 2.6888229846954346
Epoch [9/20], Loss: 2.2694056034088135
Epoch [10/20], Loss: 1.9699627161026
Epoch [11/20], Loss: 1.6580067873001099
Epoch [12/20], Loss: 1.3515217304229736
Epoch [13/20], Loss: 1.102588415145874
Epoch [14/20], Loss: 0.875633955001831
Epoch [15/20], Loss: 0.6022496819496155
Epoch [16/20], Loss: 0.3885173499584198
Epoch [17/20], Loss: 0.2664426267147064
Epoch [18/20], Loss: 0.18090182542800903
Epoch [19/20], Loss: 0.1217212900519371
Epoch [20/20], Loss: 0.08298559486865997
1221.7026200294495: seconds


In [67]:
# Test the model
with torch.no_grad():
#     with open('results.txt', 'w') as f:
    # Set the initial hidden one cell states
    # batsh_size = 1 Because we feed only 1 vector
    state = (torch.zeros(num_layers, 1, hidden_size),
             torch.zeros(num_layers, 1, hidden_size))
    # Select one word id randomly and convert it to shape (1,1)
    input = torch.randint(0, vocab_size, (1,)).long().unsqueeze(1)

    print(f"Pick word: {corpus.dictionary.idx2word[input.item()]}")
    
    for i in range(1):
        output, _ = model(input, state)
        print(output.shape)
        # Sample a word id from the exponential of the output
        print(f"output is: \n{output}")

        prob = output.exp()
        print(f"prob is: \n{prob}")
        word_id = torch.multinomial(prob, num_samples=1).item()
        print(word_id)
        print(corpus.dictionary.idx2word[word_id])
        
        

            

Pick word: kindly
torch.Size([1, 5995])
output is: 
tensor([[-6.1872, -3.9400, -3.7626,  ..., -4.0496, -7.5892, -7.2021]])
prob is: 
tensor([[2.0556e-03, 1.9447e-02, 2.3224e-02,  ..., 1.7430e-02, 5.0589e-04,
         7.4501e-04]])
5
<eos>


In [50]:
torch.tensor([1])

tensor([1])

In [18]:
for i in [ 0,  1,  2,  3,  4,  5,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18,  3,  5, 19, 20, 13, 21, 22,  9, 23, 24, 25, 26, 27, 28, 29, 30,
         3,  5, 31, 16, 17,  7, 32, 33, 34, 28, 35, 36, 25, 37]:
    print(corpus.dictionary.idx2word[i])

CHAPTER
I.
Down
the
Rabbit-Hole
<eos>
<eos>
Alice
was
beginning
to
get
very
tired
of
sitting
by
her
sister
on
the
<eos>
bank,
and
of
having
nothing
to
do:
once
or
twice
she
had
peeped
into
the
<eos>
book
her
sister
was
reading,
but
it
had
no
pictures
or
conversations


In [22]:
for i in range(0, rep_tensor.size(1) - timesteps, timesteps):
    print(i)

0
30
60
90
120
150
180
210
240
270
300
330
360
390
420
450
480
510
540
570
600
630
660
690
720
750
780
810
840
870
900
930
960
990
1020
1050
1080
1110
1140
1170
1200
1230
1260
1290
1320
1350
1380
1410
1440
1470
1500
1530
1560
1590
1620
