In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Mon Apr 12 06:42:07 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.67       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   46C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.optim as optim

from typing import *
from pathlib import Path
DATA_ROOT = Path("../data/brown")
N_EPOCHS = 210
from enum import IntEnum
class Dim(IntEnum):
    batch = 0
    seq = 1
    feature = 2

In [None]:
#only run this cell if running the notebook from Google Colaboratory
!pip install allennlp==0.8.0
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [None]:
class NaiveLSTM(nn.Module):
    def __init__(self, input_sz: int, hidden_sz: int):
        super().__init__()
        self.input_size = input_sz
        self.hidden_size = hidden_sz
        #Define/initialize all tensors   
        # forget gate
        self.Wf = Parameter(torch.Tensor(input_sz+hidden_sz, hidden_sz))
        self.bf = Parameter(torch.Tensor(hidden_sz))
        # input gate
        self.Wi = Parameter(torch.Tensor(input_sz+hidden_sz, hidden_sz))
        self.bi = Parameter(torch.Tensor(hidden_sz))
        # Candidate memory cell
        self.Wc = Parameter(torch.Tensor(input_sz+hidden_sz, hidden_sz))
        self.bc = Parameter(torch.Tensor(hidden_sz))
        # output gate
        self.Wo = Parameter(torch.Tensor(input_sz+hidden_sz, hidden_sz))
        self.bo = Parameter(torch.Tensor(hidden_sz))
        
        self.init_weights()
    
    def init_weights(self):
        for p in self.parameters():
            if p.data.ndimension() >= 2:
                nn.init.xavier_uniform_(p.data)
            else:
                nn.init.zeros_(p.data)
        
    #Define forward pass through all LSTM cells across all timesteps.
    #By using PyTorch functions, we get backpropagation for free.
    def forward(self, x: torch.Tensor, 
                init_states: Optional[Tuple[torch.Tensor, torch.Tensor]]=None
               ) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
        """Assumes x is of shape (batch, sequence, feature)"""
        batch_sz, seq_sz, _ = x.size()
        hidden_seq = []
        #ht and Ct start as the previous states and end as the output states in each loop bellow
        if init_states is None:
            ht = torch.zeros((batch_sz,self.hidden_size)).to(x.device)
            Ct = torch.zeros((batch_sz,self.hidden_size)).to(x.device)
        else:
            ht, Ct = init_states
        for t in range(seq_sz): # iterate over the time steps
            xt = x[:, t, :]
            hx_concat = torch.cat((ht,xt),dim=1)

            ### The LSTM Cell!
            ft = torch.sigmoid(hx_concat @ self.Wf + self.bf)
            it = torch.sigmoid(hx_concat @ self.Wi + self.bi)
            Ct_candidate = torch.tanh(hx_concat @ self.Wc + self.bc)
            ot = torch.sigmoid(hx_concat @ self.Wo + self.bo)
            #outputs
            Ct = ft * Ct + it * Ct_candidate
            ht = ot * torch.tanh(Ct)
            ###

            hidden_seq.append(ht.unsqueeze(Dim.batch))
        hidden_seq = torch.cat(hidden_seq, dim=Dim.batch)
        # reshape from shape (sequence, batch, feature) to (batch, sequence, feature)
        hidden_seq = hidden_seq.transpose(Dim.batch, Dim.seq).contiguous()
        return hidden_seq, (ht, Ct)

#sanity testing
#note that our hidden_sz is also our defined output size for each LSTM cell.
batch_sz, seq_len, feat_sz, hidden_sz = 5, 10, 32, 16
arr = torch.randn(batch_sz, seq_len, feat_sz)
lstm = NaiveLSTM(feat_sz, hidden_sz)
ht, (hn, cn) = lstm(arr)
ht.shape #shape should be batch_sz x seq_len x hidden_sz = 5x10x16

torch.Size([5, 10, 16])

In [None]:
!mkdir -p {DATA_ROOT}
!curl https://raw.githubusercontent.com/duump/dsc/main/brown20K.txt -o {DATA_ROOT / "brown.txt"}

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100 1117k  100 1117k    0     0  20.5M      0 --:--:-- --:--:-- --:--:-- 20.5M


In [None]:
from allennlp.data.dataset_readers import LanguageModelingReader
from allennlp.data.tokenizers import CharacterTokenizer
from allennlp.data.token_indexers import SingleIdTokenIndexer
from allennlp.data import Vocabulary
from allennlp.data.iterators import BasicIterator
from allennlp.training import Trainer
from sklearn.model_selection import train_test_split

char_tokenizer = CharacterTokenizer(lowercase_characters=True)

reader = LanguageModelingReader(
    tokens_per_instance=500,
    tokenizer=char_tokenizer,
    token_indexers = {"tokens": SingleIdTokenIndexer()},
)

train_ds = reader.read(DATA_ROOT / "brown.txt")
train_ds, val_ds = train_test_split(train_ds, random_state=0, test_size=0.1)

vocab = Vocabulary.from_instances(train_ds)

iterator = BasicIterator(batch_size=32)
iterator.index_with(vocab)

0it [00:00, ?it/s]04/12/2021 06:42:55 - INFO - allennlp.data.dataset_readers.language_modeling -   Creating dataset from all text in file: ../data/brown/brown.txt

100%|██████████| 2215/2215 [00:00<00:00, 69801.07it/s]
2215it [00:02, 869.90it/s]
04/12/2021 06:42:55 - INFO - allennlp.data.vocabulary -   Fitting token dictionary from dataset.
100%|██████████| 1993/1993 [00:00<00:00, 2390.14it/s]


In [None]:
def train(model: nn.Module, epochs: int,log_dir):
    trainer = Trainer( patience=7,
        histogram_interval=10,
        summary_interval= 10,
        serialization_dir=log_dir,
        model=model.cuda() if torch.cuda.is_available() else model,
        optimizer=optim.Adam(model.parameters()),
        iterator=iterator, train_dataset=train_ds, 
        validation_dataset=val_ds, num_epochs=epochs,
        cuda_device=0 if torch.cuda.is_available() else -1
    )
    return trainer

In [None]:
from allennlp.modules.seq2seq_encoders import PytorchSeq2SeqWrapper
from allennlp.modules.token_embedders import Embedding
from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder
from allennlp.models import Model
from allennlp.nn.util import get_text_field_mask

class LanguageModel(Model):
    def __init__(self, encoder: nn.RNN, vocab: Vocabulary,
                 embedding_dim: int=50):
        super().__init__(vocab=vocab)
        # char embedding
        self.vocab_size = vocab.get_vocab_size()
        self.padding_idx = vocab.get_token_index("@@PADDING@@")
        token_embedding = Embedding(
            num_embeddings=vocab.get_vocab_size(),
            embedding_dim=embedding_dim,
            padding_index=self.padding_idx,
        )
        self.embedding = BasicTextFieldEmbedder({"tokens": token_embedding})
        self.encoder = encoder
        self.projection = nn.Linear(self.encoder.hidden_size, self.vocab_size)
        self.loss = nn.CrossEntropyLoss(ignore_index=self.padding_idx)
    
    def forward(self, input_tokens: Dict[str, torch.Tensor],
                output_tokens: Dict[str, torch.Tensor]):
        embs = self.embedding(input_tokens)
        x, _ = self.encoder(embs)
        x = self.projection(x)
        if output_tokens is not None:
            loss = self.loss(x.view((-1, self.vocab_size)), output_tokens["tokens"].flatten())
        else:
            loss = None
        return {"loss": loss, "logits": x}

LSTM PYTORCH

In [None]:
lm_naive = LanguageModel(NaiveLSTM(50, 125), vocab)
LSTM_trainer = train(lm_naive,N_EPOCHS,"./run/lstm")
LSTM_trainer.train()

04/12/2021 02:53:19 - INFO - allennlp.training.trainer -   Beginning training.
04/12/2021 02:53:19 - INFO - allennlp.training.trainer -   Epoch 0/209
04/12/2021 02:53:19 - INFO - allennlp.training.trainer -   Peak CPU memory usage MB: 3465.836
04/12/2021 02:53:19 - INFO - allennlp.training.trainer -   GPU 0 memory usage MB: 1062
04/12/2021 02:53:19 - INFO - allennlp.training.trainer -   Training
loss: 3.2862 ||: 100%|██████████| 63/63 [00:27<00:00,  2.30it/s]
04/12/2021 02:53:46 - INFO - allennlp.training.trainer -   Validating
loss: 2.9792 ||: 100%|██████████| 7/7 [00:00<00:00,  7.49it/s]
04/12/2021 02:53:47 - INFO - allennlp.training.trainer -                       Training |  Validation
04/12/2021 02:53:47 - INFO - allennlp.training.trainer -   cpu_memory_MB   |  3465.836  |       N/A
04/12/2021 02:53:47 - INFO - allennlp.training.trainer -   gpu_0_memory_MB |  1062.000  |       N/A
04/12/2021 02:53:47 - INFO - allennlp.training.trainer -   loss            |     3.286  |     2.979
0

{'best_epoch': 209,
 'best_validation_loss': 1.5146212066922868,
 'epoch': 209,
 'peak_cpu_memory_MB': 3524.168,
 'peak_gpu_0_memory_MB': 1170,
 'training_cpu_memory_MB': 3524.168,
 'training_duration': '01:39:10',
 'training_epochs': 209,
 'training_gpu_0_memory_MB': 1170,
 'training_loss': 1.4292204663867043,
 'training_start_epoch': 0,
 'validation_loss': 1.5146212066922868}

OFFCIAL LSTM

In [None]:
lm_comparison = LanguageModel(nn.LSTM(50, 125, batch_first=True), vocab)
official_LSTM = train(lm_comparison, N_EPOCHS,"./run/officiallstm")
official_LSTM.train()

04/12/2021 04:32:34 - INFO - allennlp.training.trainer -   Beginning training.
04/12/2021 04:32:34 - INFO - allennlp.training.trainer -   Epoch 0/209
04/12/2021 04:32:34 - INFO - allennlp.training.trainer -   Peak CPU memory usage MB: 3526.728
04/12/2021 04:32:34 - INFO - allennlp.training.trainer -   GPU 0 memory usage MB: 1172
04/12/2021 04:32:34 - INFO - allennlp.training.trainer -   Training
loss: 3.2745 ||: 100%|██████████| 63/63 [00:02<00:00, 21.04it/s]
04/12/2021 04:32:37 - INFO - allennlp.training.trainer -   Validating
loss: 2.9725 ||: 100%|██████████| 7/7 [00:00<00:00, 49.96it/s]
04/12/2021 04:32:37 - INFO - allennlp.training.trainer -                       Training |  Validation
04/12/2021 04:32:37 - INFO - allennlp.training.trainer -   cpu_memory_MB   |  3526.728  |       N/A
04/12/2021 04:32:37 - INFO - allennlp.training.trainer -   gpu_0_memory_MB |  1172.000  |       N/A
04/12/2021 04:32:37 - INFO - allennlp.training.trainer -   loss            |     3.274  |     2.972
0

{'best_epoch': 209,
 'best_validation_loss': 1.5052171775272913,
 'epoch': 209,
 'peak_cpu_memory_MB': 3530.812,
 'peak_gpu_0_memory_MB': 1234,
 'training_cpu_memory_MB': 3530.812,
 'training_duration': '00:11:30',
 'training_epochs': 209,
 'training_gpu_0_memory_MB': 1234,
 'training_loss': 1.4200365978573997,
 'training_start_epoch': 0,
 'validation_loss': 1.5052171775272913}

MOG LSTM

In [None]:
class MogLSTM(nn.Module):
    def __init__(self, input_sz: int, hidden_sz: int, mog_iterations: int):
        super().__init__()
        self.input_size = input_sz
        self.hidden_size = hidden_sz
        self.mog_iterations = mog_iterations
        #Define/initialize all tensors   
        self.Wih = Parameter(torch.Tensor(input_sz, hidden_sz * 4))
        self.Whh = Parameter(torch.Tensor(hidden_sz, hidden_sz * 4))
        self.bih = Parameter(torch.Tensor(hidden_sz * 4))
        self.bhh = Parameter(torch.Tensor(hidden_sz * 4))
        #Mogrifiers
        self.Q = Parameter(torch.Tensor(hidden_sz,input_sz))
        self.R = Parameter(torch.Tensor(input_sz,hidden_sz))

        self.init_weights()
    
    def init_weights(self):
        for p in self.parameters():
            if p.data.ndimension() >= 2:
                nn.init.xavier_uniform_(p.data)
            else:
                nn.init.zeros_(p.data)

    def mogrify(self,xt,ht):
      for i in range(1,self.mog_iterations+1):
        if (i % 2 == 0):
          ht = (2*torch.sigmoid(xt @ self.R)) * ht
        else:
          xt = (2*torch.sigmoid(ht @ self.Q)) * xt
      return xt, ht

    
    #Define forward pass through all LSTM cells across all timesteps.
    #By using PyTorch functions, we get backpropagation for free.
    def forward(self, x: torch.Tensor, 
                init_states: Optional[Tuple[torch.Tensor, torch.Tensor]]=None
               ) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
        """Assumes x is of shape (batch, sequence, feature)"""
        batch_sz, seq_sz, _ = x.size()
        hidden_seq = []
        #ht and Ct start as the previous states and end as the output states in each loop below
        if init_states is None:
            ht = torch.zeros((batch_sz,self.hidden_size)).to(x.device)
            Ct = torch.zeros((batch_sz,self.hidden_size)).to(x.device)
        else:
            ht, Ct = init_states
        for t in range(seq_sz): # iterate over the time steps
            xt = x[:, t, :]
            xt, ht = self.mogrify(xt,ht) #mogrification
            gates = (xt @ self.Wih + self.bih) + (ht @ self.Whh + self.bhh)
            ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)

            ### The LSTM Cell!
            ft = torch.sigmoid(forgetgate)
            it = torch.sigmoid(ingate)
            Ct_candidate = torch.tanh(cellgate)
            ot = torch.sigmoid(outgate)
            #outputs
            Ct = (ft * Ct) + (it * Ct_candidate)
            ht = ot * torch.tanh(Ct)
            ###

            hidden_seq.append(ht.unsqueeze(Dim.batch))
        hidden_seq = torch.cat(hidden_seq, dim=Dim.batch)
        # reshape from shape (sequence, batch, feature) to (batch, sequence, feature)
        hidden_seq = hidden_seq.transpose(Dim.batch, Dim.seq).contiguous()
        return hidden_seq, (ht, Ct)

#sanity testing
#note that our hidden_sz is also our defined output size for each LSTM cell.
batch_sz, seq_len, feat_sz, hidden_sz = 5, 10, 32, 16
arr = torch.randn(batch_sz, seq_len, feat_sz)
lstm = NaiveLSTM(feat_sz, hidden_sz)
ht, (hn, cn) = lstm(arr)
ht.shape #shape should be batch_sz x seq_len x hidden_sz = 5x10x16

torch.Size([5, 10, 16])

In [None]:

lm_mog = LanguageModel(MogLSTM(50, 125,5), vocab)
mog_LSTM = train(lm_mog, N_EPOCHS, "./run/mog2")
mog_LSTM.train()

04/12/2021 06:43:22 - INFO - allennlp.training.trainer -   Beginning training.
04/12/2021 06:43:22 - INFO - allennlp.training.trainer -   Epoch 0/209
04/12/2021 06:43:22 - INFO - allennlp.training.trainer -   Peak CPU memory usage MB: 3517.1
04/12/2021 06:43:22 - INFO - allennlp.training.trainer -   GPU 0 memory usage MB: 1062
04/12/2021 06:43:22 - INFO - allennlp.training.trainer -   Training
loss: 3.2272 ||: 100%|██████████| 63/63 [00:46<00:00,  1.34it/s]
04/12/2021 06:44:09 - INFO - allennlp.training.trainer -   Validating
loss: 2.7604 ||: 100%|██████████| 7/7 [00:01<00:00,  4.61it/s]
04/12/2021 06:44:11 - INFO - allennlp.training.trainer -                       Training |  Validation
04/12/2021 06:44:11 - INFO - allennlp.training.trainer -   loss            |     3.227  |     2.760
04/12/2021 06:44:11 - INFO - allennlp.training.trainer -   cpu_memory_MB   |  3517.100  |       N/A
04/12/2021 06:44:11 - INFO - allennlp.training.trainer -   gpu_0_memory_MB |  1062.000  |       N/A
04/

{'best_epoch': 132,
 'best_validation_loss': 1.502693738256182,
 'epoch': 138,
 'peak_cpu_memory_MB': 3539.872,
 'peak_gpu_0_memory_MB': 1236,
 'training_cpu_memory_MB': 3539.872,
 'training_duration': '01:48:42',
 'training_epochs': 138,
 'training_gpu_0_memory_MB': 1236,
 'training_loss': 1.3942242312052893,
 'training_start_epoch': 0,
 'validation_loss': 1.503074049949646}

Visualisasi

In [None]:

#import matplotlib.pyplot as plt
#%matplotlib inline
%tensorboard --logdir "./run/mog2"

ERROR: Failed to launch TensorBoard (exited with 1).
Contents of stderr:
2021-04-12 08:33:49.177402: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
Traceback (most recent call last):
  File "/usr/local/bin/tensorboard", line 8, in <module>
    sys.exit(run_main())
  File "/usr/local/lib/python3.7/dist-packages/tensorboard/main.py", line 65, in run_main
    default.get_plugins(),
  File "/usr/local/lib/python3.7/dist-packages/tensorboard/default.py", line 108, in get_plugins
    return get_static_plugins() + get_dynamic_plugins()
  File "/usr/local/lib/python3.7/dist-packages/tensorboard/default.py", line 146, in get_dynamic_plugins
    "tensorboard_plugins"
  File "/usr/local/lib/python3.7/dist-packages/tensorboard/default.py", line 145, in <listcomp>
    for entry_point in pkg_resources.iter_entry_points(
  File "/usr/local/lib/python3.7/dist-packages/pkg_resources/__init__.py", line 2449, in load
    self.require(

In [None]:

#zip results
!zip -r all20k.zip ./run

  adding: run/ (stored 0%)
  adding: run/mog2/ (stored 0%)
  adding: run/mog2/metrics_epoch_134.json (deflated 55%)
  adding: run/mog2/metrics_epoch_34.json (deflated 56%)
  adding: run/mog2/metrics_epoch_28.json (deflated 56%)
  adding: run/mog2/metrics_epoch_16.json (deflated 56%)
  adding: run/mog2/training_state_epoch_133.th (deflated 7%)
  adding: run/mog2/model_state_epoch_122.th (deflated 8%)
  adding: run/mog2/training_state_epoch_123.th (deflated 7%)
  adding: run/mog2/metrics_epoch_99.json (deflated 55%)
  adding: run/mog2/metrics_epoch_30.json (deflated 56%)
  adding: run/mog2/training_state_epoch_122.th (deflated 7%)
  adding: run/mog2/training_state_epoch_136.th (deflated 7%)
  adding: run/mog2/model_state_epoch_119.th (deflated 8%)
  adding: run/mog2/metrics_epoch_82.json (deflated 56%)
  adding: run/mog2/metrics_epoch_32.json (deflated 56%)
  adding: run/mog2/metrics_epoch_124.json (deflated 55%)
  adding: run/mog2/metrics_epoch_59.json (deflated 57%)
  adding: run/mog2/

In [None]:
#download/upload Colaboratory files to google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

!cp all20k.zip /content/drive/My\ Drive/all20k.zip