In [3]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [4]:
cd '../gdrive/MyDrive/nlp/from-scratch/nlp-from-scratch/code/rnnlm'

/gdrive/MyDrive/nlp/from-scratch/nlp-from-scratch/code/rnnlm


In [5]:
import sys
sys.path.append('../..')
from common import config

config.GPU = True

from common.optimizer import SGD
from common.trainer import RnnlmTrainer
from common.util import eval_perplexity
from data import ptb
from better_rnnlm import BetterRnnlm


[92m------------------------------------------------------------[0m
                       [92mGPU Mode (cupy)[0m
[92m------------------------------------------------------------[0m



In [6]:
# hyperparameter setting
batch_size = 20
wordvec_size = 650
hidden_size = 650
time_size = 35
lr = 20.0
max_epoch = 40
max_grad = 0.25
dropout = 0.5

In [7]:
# data load
corpus, word_to_id, id_to_word = ptb.load_data('train')
corpus_val, _, _ = ptb.load_data('val')
corpus_test, _, _ = ptb.load_data('test')

vocab_size = len(word_to_id)
xs = corpus[:-1]
ts = corpus[1:]

In [8]:
model = BetterRnnlm(vocab_size, wordvec_size, hidden_size, dropout)
optimizer = SGD(lr)
trainer = RnnlmTrainer(model, optimizer)

In [None]:
best_ppl = float('inf')
for epoch in range(max_epoch):
  trainer.fit(xs, ts, max_epoch=1, batch_size=batch_size, time_size=time_size, max_grad=max_grad)
  model.reset_state()
  ppl = eval_perplexity(model, corpus_val)
  print(f'perplexity : {ppl}')
  
  if best_ppl > ppl:
    best_ppl = ppl
    model.save_params()
  else:
    lr /= 4.0
    optimizer.lr = lr
  
  model.reset_state()
  print('-'*50)


| current_epoch 1 |  iters 1 / 1327 | time 0.4454360008239746[s] | perplexity 10000.160734654946
| current_epoch 1 |  iters 21 / 1327 | time 7.921055793762207[s] | perplexity 3580.732310198206
| current_epoch 1 |  iters 41 / 1327 | time 15.324316263198853[s] | perplexity 1925.921718605458
| current_epoch 1 |  iters 61 / 1327 | time 22.75799822807312[s] | perplexity 1328.8682288304137
| current_epoch 1 |  iters 81 / 1327 | time 30.156270503997803[s] | perplexity 994.353808345002
| current_epoch 1 |  iters 101 / 1327 | time 37.58293032646179[s] | perplexity 832.0903739661532
| current_epoch 1 |  iters 121 / 1327 | time 44.964157581329346[s] | perplexity 848.3930189269851
| current_epoch 1 |  iters 141 / 1327 | time 52.35613226890564[s] | perplexity 715.0511963484628
| current_epoch 1 |  iters 161 / 1327 | time 59.763336420059204[s] | perplexity 684.7571220130403
| current_epoch 1 |  iters 181 / 1327 | time 67.16836428642273[s] | perplexity 689.0694820954
| current_epoch 1 |  iters 201 / 

In [9]:
model.load_params()

In [None]:
model.params

In [11]:
trainer= RnnlmTrainer(model, optimizer)

In [12]:
trainer.current_epoch = 15

In [1]:
best_ppl = float('inf')
for epoch in range(max_epoch):
  trainer.fit(xs, ts, max_epoch=1, batch_size=batch_size, time_size=time_size, max_grad=max_grad)
  model.reset_state()
  ppl = eval_perplexity(model, corpus_val)
  print(f'perplexity : {ppl}')
  
  if best_ppl > ppl:
    best_ppl = ppl
    model.save_params()
  else:
    lr /= 4.0
    optimizer.lr = lr
  
  model.reset_state()
  print('-'*50)

NameError: ignored

In [11]:
model.load_params()

In [13]:
model.grads

[array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 array([0., 0., 0., ..., 0., 0., 0.], dtype=float32),
 array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ...

In [14]:
ppl_test = eval_perplexity(model, corpus_test)
print(ppl_test)

evaluating perplexity ...
234 / 235
76.2854392816185
