## Import

In [4]:
# reload updated module
%load_ext autoreload
%autoreload 2

# make screen full width
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

from train import *
from pytorch_lightning.callbacks import EarlyStopping

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Model

In [5]:
model = BiLSTM_CRF_PL(
    n_vocab1 = 13907, n_vocab2 = 0, n_vocab3 = 0, n_label = 5,
    emb1_dim = 512, emb2_dim = 0, emb3_dim = 0, hid_dim = 1024,
    m_type = 'sy', data_name = 'sy_1', lr = 0.001, batch_size=100)

## Train

In [8]:
model.hparams.lr = 0.0005

In [9]:
trainer = pl.Trainer(
    max_epochs=3, 
    log_every_n_steps=1,
    flush_logs_every_n_steps=1,
    gpus=1, 
    weights_summary=None, 
    progress_bar_refresh_rate=10)
trainer.fit(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

1

In [None]:
trainer = pl.Trainer(
    max_epochs=1, 
    limit_train_batches=50, 
    limit_val_batches=20,
    log_every_n_steps=1,
    flush_logs_every_n_steps=5,
    gpus=1, 
    weights_summary=None, 
    progress_bar_refresh_rate=5)
trainer.fit(model)

## Train from Checkpoint

In [None]:
# resume from check point (must initiate the model first)
trainer = pl.Trainer(
    max_epochs=10, 
    limit_train_batches=10, 
    limit_val_batches=3,
    gpus=1, 
    weights_summary=None, 
    progress_bar_refresh_rate=5,
    resume_from_checkpoint='lightning_logs/version_109/checkpoints/epoch=9-step=99.ckpt')
trainer.fit(model)

## Load from Checkpoint

In [19]:
model = BiLSTM_CRF_PL.load_from_checkpoint(
    'lightning_logs/version_31/checkpoints/epoch=0-step=19.ckpt')
print(model.hparams)

"data_name":  sy_1
"hid_dim":   1024
"lr":           0.001
"m_type":    sy
"n_label":    5
"emb1_dim":    512
"n_vocab1": 13907
"emb2_dim":    0
"n_vocab2": 0
"emb3_dim":    0
"n_vocab3": 0


## Test

In [10]:
result = trainer.test(model)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'char f1': 0.9795994153201912,
 'char precision': 0.9750617361624487,
 'char recall': 0.98417952625897,
 'word f1': 0.9485007703551535,
 'word precision': 0.9441071456658855,
 'word recall': 0.9529354797739252}
--------------------------------------------------------------------------------


## Tools

### Debugging

In [None]:
# touch all the code to find bugs
trainer = pl.Trainer(
    max_epochs=20, 
    fast_dev_run=True, # here
    limit_train_batches=1, 
    limit_val_batches=1,
    gpus=1, 
    weights_summary=None)
trainer.fit(model)

In [None]:
# train small number of batchs
trainer = pl.Trainer(
    max_epochs=20, 
    limit_train_batches=10, # here
    limit_val_batches=1, # here
    log_every_n_steps=1,
    flush_logs_every_n_steps=1,
    gpus=1, 
    weights_summary=None)
trainer.fit(model)

In [None]:
# train only 10% of an epoch
trainer = pl.Trainer(
    max_epochs=3, 
    limit_train_batches=0.1, # here
    log_every_n_steps=1,
    flush_logs_every_n_steps=1,
    gpus=1, 
    weights_summary=None)
trainer.fit(model)

In [None]:
# for large batch, run validation every 25% of a training epoch
trainer = pl.Trainer(
    max_epochs=3, 
    val_check_interval=0.25, # here
    limit_val_batches=1,
    log_every_n_steps=1,
    gpus=1, 
    weights_summary=None)
trainer.fit(model)

In [None]:
# Profile your code to find speed/memory bottlenecks
pl.Trainer(profiler="simple")

### Find LR and BS

In [None]:
# search lr
lr_finder = trainer.tuner.lr_find(model)
dfig = lr_finder.plot(suggest=True)

In [14]:
# set
model.hparams.lr = 0.0001

In [26]:
# search batch_size
trainer = pl.Trainer(gpus=1)
tuner = trainer.tuner.scale_batch_size(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


size of data: 134413 15088 2253


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


size of data: 134413 15088 2253


Batch size 2 succeeded, trying batch size 4
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


size of data: 134413 15088 2253


Batch size 4 succeeded, trying batch size 8
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


size of data: 134413 15088 2253


Batch size 8 succeeded, trying batch size 16
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


size of data: 134413 15088 2253


Batch size 16 succeeded, trying batch size 32
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


size of data: 134413 15088 2253


Batch size 32 succeeded, trying batch size 64
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


size of data: 134413 15088 2253


Batch size 64 succeeded, trying batch size 128
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


size of data: 134413 15088 2253


Batch size 128 succeeded, trying batch size 256
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


size of data: 134413 15088 2253


Batch size 256 succeeded, trying batch size 512
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


size of data: 134413 15088 2253


Batch size 512 succeeded, trying batch size 1024
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


size of data: 134413 15088 2253


Traceback (most recent call last):
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 607, in run_train
    self.train_loop.run_training_epoch()
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 422, in run_training_epoch
    batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 575, in run_training_batch
    self.optimizer_step(optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 362, in optimizer_step
    model_ref.optimizer_step(
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/lightning.py", line 1414, in optimizer_step
    optimizer.step(closure=optimizer_closure)
  File "/home/jin/anaconda3/lib/python3.8/site-

size of data: 134413 15088 2253


Traceback (most recent call last):
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 607, in run_train
    self.train_loop.run_training_epoch()
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 422, in run_training_epoch
    batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 575, in run_training_batch
    self.optimizer_step(optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 362, in optimizer_step
    model_ref.optimizer_step(
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/lightning.py", line 1414, in optimizer_step
    optimizer.step(closure=optimizer_closure)
  File "/home/jin/anaconda3/lib/python3.8/site-

size of data: 134413 15088 2253


Traceback (most recent call last):
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 607, in run_train
    self.train_loop.run_training_epoch()
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 422, in run_training_epoch
    batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 575, in run_training_batch
    self.optimizer_step(optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 362, in optimizer_step
    model_ref.optimizer_step(
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/lightning.py", line 1414, in optimizer_step
    optimizer.step(closure=optimizer_closure)
  File "/home/jin/anaconda3/lib/python3.8/site-

size of data: 134413 15088 2253


Traceback (most recent call last):
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 607, in run_train
    self.train_loop.run_training_epoch()
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 422, in run_training_epoch
    batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 575, in run_training_batch
    self.optimizer_step(optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 362, in optimizer_step
    model_ref.optimizer_step(
  File "/home/jin/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/lightning.py", line 1414, in optimizer_step
    optimizer.step(closure=optimizer_closure)
  File "/home/jin/anaconda3/lib/python3.8/site-

In [27]:
# set
model.hparams.batch_size = 512

### Callbacks: EarlyStopping

In [29]:
early_stop_callback = EarlyStopping(
   monitor='val_acc',
   min_delta=0.00,
   patience=3,
   verbose=False,
   mode='max')

In [31]:
trainer = pl.Trainer(
    max_epochs=30, 
    limit_train_batches=5, 
    limit_val_batches=3,
    log_every_n_steps=1,
    flush_logs_every_n_steps=1,
    callbacks=[early_stop_callback], # here
    gpus=1, 
    weights_summary=None, 
    progress_bar_refresh_rate=1)
trainer.fit(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


size of data: 134413 15088 2253


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

1

### Callbacks: LearningRateMonitor

In [None]:
# basic
def configure_optimizers(self):
   optimizer = Adam(...)
   scheduler = LambdaLR(optimizer, ...)
   return [optimizer], [scheduler]

# when scheduler require lr monitor, use callback
lr_monitor = LearningRateMonitor(logging_interval='step')
trainer = Trainer(callbacks=[lr_monitor])

# The ReduceLROnPlateau scheduler requires a monitor
def configure_optimizers(self):
   return {
       'optimizer': Adam(...),
       'lr_scheduler': ReduceLROnPlateau(optimizer, ...),
       'monitor': 'metric_to_track'
   }

## Technique

### Freeze layers

In [65]:
model = BiLSTM_CRF_PL(n_vocab1 = 13907)

In [66]:
# check weight name of the model
for weight_name in model.state_dict():
    print(weight_name)

transitions
x1emb.weight
lstm.weight_ih_l0
lstm.weight_hh_l0
lstm.bias_ih_l0
lstm.bias_hh_l0
lstm.weight_ih_l0_reverse
lstm.weight_hh_l0_reverse
lstm.bias_ih_l0_reverse
lstm.bias_hh_l0_reverse
hidden2tag.weight
hidden2tag.bias


In [67]:
# freeze the embedding weight
model.x1emb.weight.requires_grad = False

In [68]:
# observe embeding weight before training
model.x1emb.weight[0][:5]

tensor([-0.9247,  0.1824, -1.3625,  3.0842,  1.3788])

In [70]:
# observe weight of other layer (to be changed)
model.lstm.weight_ih_l0[0][:5]

tensor([ 0.0516,  0.0441,  0.0779,  0.0776, -0.0820], grad_fn=<SliceBackward>)

In [74]:
# weight summary shows Non-trainable params
trainer = pl.Trainer(
    max_epochs=1, 
    limit_train_batches=10, 
    limit_val_batches=1,
    gpus=1)
trainer.fit(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name       | Type      | Params
-----------------------------------------
0 | x1emb      | Embedding | 890 K 
1 | lstm       | LSTM      | 66.6 K
2 | hidden2tag | Linear    | 645   
3 | dropout    | Dropout   | 0     
-----------------------------------------
67.2 K    Trainable params
890 K     Non-trainable params
957 K     Total params
3.829     Total estimated model params size (MB)


size of data: 134413 15088 2253


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

1

In [75]:
# weight is unchanged after trainning
model.x1emb.weight[0][:5]

tensor([-0.9247,  0.1824, -1.3625,  3.0842,  1.3788])

In [76]:
# weight of other layer changes
model.lstm.weight_ih_l0[0][:5]

tensor([ 0.0517,  0.0440,  0.0780,  0.0775, -0.0819], grad_fn=<SliceBackward>)

### Import weight from pretrained

In [77]:
# load pretrained model from checkpoint
pretrained_model = BiLSTM_CRF_PL.load_from_checkpoint(
    'lightning_logs/version_45/checkpoints/epoch=0-step=9.ckpt')

In [78]:
# look at pretrained embedding weight
pretrained_model.x1emb.weight[0][:5]

tensor([ 0.3177,  0.5379, -1.0707, -1.2163, -0.9013], grad_fn=<SliceBackward>)

In [87]:
# define new model
new_model = BiLSTM_CRF_PL(n_vocab1 = 13907)

In [88]:
# explore embedding weight (different)
new_model.x1emb.weight[0][:5]

tensor([-0.2876, -0.7774, -1.9447,  0.8273,  0.4592], grad_fn=<SliceBackward>)

In [89]:
# set the weight of new_model = the weight of pretrained_model
new_model.x1emb.weight.data.copy_ = pretrained_model.x1emb.weight

In [91]:
# embedding weight has changed
new_model.x1emb.weight[0][:5]

tensor([-0.2876, -0.7774, -1.9447,  0.8273,  0.4592], grad_fn=<SliceBackward>)

In [92]:
# train
trainer = pl.Trainer(
    max_epochs=1, 
    limit_train_batches=10, 
    limit_val_batches=1,
    gpus=1)
trainer.fit(new_model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name       | Type      | Params
-----------------------------------------
0 | x1emb      | Embedding | 890 K 
1 | lstm       | LSTM      | 66.6 K
2 | hidden2tag | Linear    | 645   
3 | dropout    | Dropout   | 0     
-----------------------------------------
957 K     Trainable params
0         Non-trainable params
957 K     Total params
3.829     Total estimated model params size (MB)


size of data: 134413 15088 2253


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

1

In [93]:
# embedding weight has changed
new_model.x1emb.weight[0][:5]

tensor([-0.2877, -0.7774, -1.9446,  0.8272,  0.4591], grad_fn=<SliceBackward>)

get glove embedding<br> 
https://stackoverflow.com/questions/37793118/load-pretrained-glove-vectors-in-python

# Try

In [5]:
import json
with open('dataset/' + 'sy_1' + '.json') as f: 
    (train_data, _to_ix, 
    validate_data, test_data) = json.load(f)

In [6]:
# with open('dataset/' + 'sy_1_small' + '.json', 'w') as f: 
#     json.dump((train_data[:5000], _to_ix, 
#                validate_data[:2000], test_data), f)

In [None]:
train_data[0]

In [10]:
def show(a, b):
    return a, b

In [11]:
show(1, show(2, 3))

(1, (2, 3))

In [6]:
a

2

In [10]:
from pandas.core.common import flatten
for x in flatten([1, [2, 3, [4, 5]]]):
    print(x)

1
2
3
4
5


In [11]:
from dataset import *

In [12]:
from create_dataset import *

ModuleNotFoundError: No module named 'ssg'