In [1]:
import torch
import torch.nn as nn
from utils import TextProcess, TextGenerator

In [2]:
embed_size = 128    #Input features to the LSTM
hidden_size = 1024  #Number of LSTM units
num_layers = 1
num_epochs = 20
batch_size = 20
timesteps = 30
learning_rate = 0.002

In [3]:
corpus = TextProcess()

In [4]:
rep_tensor = corpus.get_data('alice.txt', batch_size=batch_size)

In [5]:
vocab_size = len(corpus.dictionary)
num_batches = rep_tensor.shape[1]//timesteps

In [6]:
rep_tensor.shape

torch.Size([20, 1652])

In [7]:
embed_size

128

In [8]:
timesteps

30

In [9]:
# Get model
model = TextGenerator(vocab_size, embed_size, hidden_size, num_layers)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [49]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

Model's state_dict:
embed.weight 	 torch.Size([5995, 128])
lstm.weight_ih_l0 	 torch.Size([4096, 128])
lstm.weight_hh_l0 	 torch.Size([4096, 1024])
lstm.bias_ih_l0 	 torch.Size([4096])
lstm.bias_hh_l0 	 torch.Size([4096])
linear.weight 	 torch.Size([5995, 1024])
linear.bias 	 torch.Size([5995])
Optimizer's state_dict:
state 	 {}
param_groups 	 [{'lr': 0.002, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'params': [4560272192, 4560272048, 4560272336, 4560272408, 4560272480, 4560272552, 4560272624]}]


In [50]:
import time

start = time.time()
print("Start training the model")
for epoch in range(num_epochs):
    # Set zeros to be the initial hidden and cell states
    states = (torch.zeros(num_layers, batch_size, hidden_size),
             torch.zeros(num_layers, batch_size, hidden_size))
    for i in range(0, rep_tensor.size(1) - timesteps, timesteps):
        # Get mini-batch inputs and targets
        inputs = rep_tensor[:, i:i+timesteps]
        targets = rep_tensor[:, (i+1):(i+1)+timesteps]
        # 600 = 20 * 30 = batch_size * timesteps
        outputs, _ = model(inputs, states) # outputs is 600x1
        loss = loss_fn(outputs, targets.reshape(-1)) # reshape to be 600
        
        # Backpropagation and Weight Update
        model.zero_grad()
        loss.backward()
        
        # Add gradient clipping to prevent `Gradient Vanishing`
        nn.utils.clip_grad_norm(model.parameters(), 0.5)
        optimizer.step()
        
        step = (i+1) // timesteps
        if step % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}")
end = time.time()
print(f"{end - start}: seconds")
torch.save(model.state_dict(), 'lstm.pt')

Start training the model




Epoch [1/20], Loss: 8.70119857788086
Epoch [2/20], Loss: 6.043236255645752
Epoch [3/20], Loss: 5.209585666656494
Epoch [4/20], Loss: 4.527743339538574
Epoch [5/20], Loss: 3.9210705757141113
Epoch [6/20], Loss: 3.469094753265381
Epoch [7/20], Loss: 3.019498586654663
Epoch [8/20], Loss: 2.5557782649993896
Epoch [9/20], Loss: 2.1801047325134277
Epoch [10/20], Loss: 1.7460548877716064
Epoch [11/20], Loss: 1.3886160850524902
Epoch [12/20], Loss: 1.1579561233520508
Epoch [13/20], Loss: 0.946799635887146
Epoch [14/20], Loss: 0.6713408827781677
Epoch [15/20], Loss: 0.4528830945491791
Epoch [16/20], Loss: 0.30095961689949036
Epoch [17/20], Loss: 0.1882040947675705
Epoch [18/20], Loss: 0.12131280452013016
Epoch [19/20], Loss: 0.08757255971431732
Epoch [20/20], Loss: 0.07487738877534866
1154.1750988960266: seconds


In [96]:
# Load the trained model from file
model = TextGenerator(vocab_size, embed_size, hidden_size, num_layers)
model.load_state_dict(torch.load('lstm.pt'))
model.eval()

TextGenerator(
  (embed): Embedding(5995, 128)
  (lstm): LSTM(128, 1024, batch_first=True)
  (linear): Linear(in_features=1024, out_features=5995, bias=True)
)

In [97]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

Model's state_dict:
embed.weight 	 torch.Size([5995, 128])
lstm.weight_ih_l0 	 torch.Size([4096, 128])
lstm.weight_hh_l0 	 torch.Size([4096, 1024])
lstm.bias_ih_l0 	 torch.Size([4096])
lstm.bias_hh_l0 	 torch.Size([4096])
linear.weight 	 torch.Size([5995, 1024])
linear.bias 	 torch.Size([5995])
Optimizer's state_dict:
state 	 {4560272192: {'step': 1100, 'exp_avg': tensor([[ 0.0000, -0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000, -0.0000,  0.0000,  ..., -0.0000, -0.0000,  0.0000],
        [ 0.0000,  0.0000, -0.0000,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [ 0.0000,  0.0000, -0.0000,  ..., -0.0000,  0.0000, -0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]]), 'exp_avg_sq': tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0

In [98]:
vocab_size

5995

In [104]:
# Test the model
filename = 'results.txt'
with torch.no_grad():
    with open(filename, 'w') as f:
        # Set the initial hidden one cell states
        # batch_size = 1 Because we feed only 1 vector
        state = (torch.zeros(num_layers, 1, hidden_size),
                 torch.zeros(num_layers, 1, hidden_size))
        # Select one word id randomly and convert it to shape (1,1)

        # I need to compare Fawaz version and my version. Then let it be 
        # reproducible program
        # input = torch.tensor([[9]])
        input = torch.randint(0, vocab_size, (1,)).long().unsqueeze(1)
        print(input)
        print(f"Pick word: {corpus.dictionary.idx2word[input.item()]}")

        for i in range(500):
            output, _ = model(input, state)

            prob = output.exp()
            word_id = torch.multinomial(prob, num_samples=1).item()

            input.fill_(word_id)
            print(f"Predicted word is: {corpus.dictionary.idx2word[input.item()]}\t\t\t{word_id}. ")
            
            # Write the results to file
            word = corpus.dictionary.idx2word[word_id]
            word = '\n' if word == '<eos>' else word + ' '
            f.write(word)
            
            if (i+1) % 100 == 0:
                print(f"Sample [{i+1}/{500}] and save to {filename}")

tensor([[5976]])
Pick word: editions,
Predicted word is: delay			4158. 
Predicted word is: the			3. 
Predicted word is: fight			3969. 
Predicted word is: her			16. 
Predicted word is: head			550. 
Predicted word is: off			252. 
Predicted word is: a			44. 
Predicted word is: queer			970. 
Predicted word is: pig,’			3207. 
Predicted word is: the			3. 
Predicted word is: King			3780. 
Predicted word is: <eos>			5. 
Predicted word is: <eos>			5. 
Predicted word is: The			165. 
Predicted word is: waistcoat-pocket,			142. 
Predicted word is: as			55. 
Predicted word is: it			34. 
Predicted word is: had			28. 
Predicted word is: not			173. 
Predicted word is: help			946. 
Predicted word is: and			20. 
Predicted word is: was			7. 
Predicted word is: as			55. 
Predicted word is: she			27. 
Predicted word is: went			161. 
Predicted word is: on			18. 
Predicted word is: a			44. 
Predicted word is: sort			290. 
Predicted word is: of			13. 
Predicted word is: the			3. 
Predicted word is: roof			888

Predicted word is: jury-box			4959. 
Predicted word is: with			80. 
Predicted word is: you,			645. 
Predicted word is: and			20. 
Predicted word is: there			208. 
Predicted word is: <eos>			5. 
Predicted word is: <eos>			5. 
Predicted word is: <eos>			5. 
Predicted word is: ‘Then			2051. 
Predicted word is: the			3. 
Predicted word is: moral			4040. 
Predicted word is: of			13. 
Predicted word is: his			1055. 
Predicted word is: first			505. 
Predicted word is: to			9. 
Predicted word is: look			186. 
Predicted word is: up			73. 
Predicted word is: into			30. 
Predicted word is: a			44. 
Predicted word is: Gryphon,			4178. 
Predicted word is: and,			964. 
Predicted word is: as			55. 
Predicted word is: he			931. 
Predicted word is: spoke,			2632. 
Predicted word is: and			20. 
Predicted word is: <eos>			5. 
Predicted word is: about			174. 
Predicted word is: the			3. 
Predicted word is: Gryphon.			4189. 
Predicted word is: <eos>			5. 
Predicted word is: The			165. 
Predicted word is: M

In [102]:
type(output)

torch.Tensor

In [44]:
max_value = torch.max(output)
print(max_value)

tensor(0.1361)


In [45]:
output

tensor([[ 0.0401,  0.0060, -0.0380,  ..., -0.0294, -0.0329,  0.0561]])

In [26]:
idx = (output == max_value).nonzero()
print(idx)

tensor([[   0, 1898]])


In [36]:
idx[0][0].item()
idx[0][1].item()

1898

In [37]:
corpus.dictionary.idx2word[idx[0][1].item()]

'plate'

In [41]:
print(torch.__version__)

0.4.1


In [43]:
input = torch.randint(0, vocab_size, (1,)).long().unsqueeze(1)
print(input)

tensor([[5026]])


In [18]:
for i in [ 0,  1,  2,  3,  4,  5,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18,  3,  5, 19, 20, 13, 21, 22,  9, 23, 24, 25, 26, 27, 28, 29, 30,
         3,  5, 31, 16, 17,  7, 32, 33, 34, 28, 35, 36, 25, 37]:
    print(corpus.dictionary.idx2word[i])

CHAPTER
I.
Down
the
Rabbit-Hole
<eos>
<eos>
Alice
was
beginning
to
get
very
tired
of
sitting
by
her
sister
on
the
<eos>
bank,
and
of
having
nothing
to
do:
once
or
twice
she
had
peeped
into
the
<eos>
book
her
sister
was
reading,
but
it
had
no
pictures
or
conversations


In [22]:
for i in range(0, rep_tensor.size(1) - timesteps, timesteps):
    print(i)

0
30
60
90
120
150
180
210
240
270
300
330
360
390
420
450
480
510
540
570
600
630
660
690
720
750
780
810
840
870
900
930
960
990
1020
1050
1080
1110
1140
1170
1200
1230
1260
1290
1320
1350
1380
1410
1440
1470
1500
1530
1560
1590
1620
