In [1]:
import torch
import torch.nn.functional as F
import torch.optim as optim

import sys
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm_notebook as tqdm

sys.path.append('../')
from shared.models.basic_lstm import BasicLSTM
from shared.process.pa4_dataloader import build_all_loaders
from shared.process.PA4Trainer import get_computing_device, PA4Trainer

In [2]:
import torch
import torch.nn as nn
import torch.nn.init as init

class MultiLayerLSTM(nn.Module):
    """
    Since forward's outputs are logits, use with criterion nn.CrossEntropyLoss().
    NOTE: Work with only one batch input.
    """
    
    def __init__(self, num_input, num_hidden, num_layers, num_output):
        super(MultiLayerLSTM, self).__init__()
        
        self.num_hidden = num_hidden
        self.num_layers = num_layers
        self.lstm = nn.LSTM(num_input, num_hidden, num_layers=num_layers, batch_first=True)
        self.h2o = nn.Linear(num_hidden, num_output)       

        init.xavier_normal_(self.h2o.weight)
    
    def forward(self, input):        
        """
        Input with shape [1, chunk_size, num_feature]
        E.g., [1, chunk_size, 92]
        """
        batch_size, chunk_size, feature_size = input.size()
        
        # input + hidden to hidden
        lstm_out, self.hidden = self.lstm(input, self.hidden)
        
        # (chunk_size, num_hidden)
        linear_input = lstm_out.contiguous().view(-1, self.num_hidden)
        logits = self.h2o(linear_input)
        return logits
    
    def detach_hidden(self):
        self.hidden = (self.hidden[0].detach(), self.hidden[1].detach())
        
    def reset_hidden(self, computing_device):
        self.hidden = self.init_hidden(computing_device)
        
    # Helper to init hidden state
    def init_hidden(self, computing_device=None):
        # (batch_size, num_layers, num_hidden)
        if computing_device:
            return (torch.zeros(self.num_layers, 1, self.num_hidden).to(computing_device), 
                    torch.zeros(self.num_layers, 1, self.num_hidden).to(computing_device))
        else:
            return (torch.zeros(self.num_layers, 1, self.num_hidden), 
                    torch.zeros(self.num_layers, 1, self.num_hidden))

In [3]:
computing_device = get_computing_device()
all_loaders, infos = build_all_loaders('../pa4Data/')

char2ind = infos['char_2_index']
ind2char = infos['index_2_char']

LEARNING_RATE = 0.01

INPUT_SIZE = len(char2ind)
HIDDEN_SIZE = 100
OUTPUT_SIZE = len(char2ind)

model = MultiLayerLSTM(INPUT_SIZE, HIDDEN_SIZE, 2, OUTPUT_SIZE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [4]:
trainer = PA4Trainer(model, criterion, optimizer, all_loaders, {
    'path_to_save_result': './exp',
    'session_name': 'test_multilayer_lstm',
    'n_epochs': 200,
    'print_every_n_epochs': 5,
    'validate_every_v_epochs': 5,
    'verbose': True,
    'num_epochs_no_improvement_early_stop': 3,
    'use_early_stop': True,
    'pass_hidden_states_between_epochs': False,
})
trainer.start()

Coverted model to device: cuda
-----------
Trainer Config:
{   'criterion': 'CrossEntropyLoss()',
    'model': 'MultiLayerLSTM(\n'
             '  (lstm): LSTM(93, 100, num_layers=2, batch_first=True)\n'
             '  (h2o): Linear(in_features=100, out_features=93, bias=True)\n'
             ')',
    'n_epochs': 200,
    'num_epochs_no_improvement_early_stop': 3,
    'optimizer': 'Adam (\n'
                 'Parameter Group 0\n'
                 '    amsgrad: False\n'
                 '    betas: (0.9, 0.999)\n'
                 '    eps: 1e-08\n'
                 '    lr: 0.01\n'
                 '    weight_decay: 0\n'
                 ')',
    'pass_hidden_states_between_epochs': False,
    'path_to_save_result': './exp',
    'print_every_n_epochs': 5,
    'session_name': 'test_multilayer_lstm',
    'use_early_stop': True,
    'validate_every_v_epochs': 5,
    'verbose': True}
-----------
Start training...
Epoch 0, validation loss: 2.7401894261218884
Epoch 0, 0% (0m 33s) train los

Process Process-7:
KeyboardInterrupt
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/opt/conda/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 52, in _worker_loop
    r = index_queue.get()
  File "/opt/conda/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "/opt/conda/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/opt/conda/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/opt/conda/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


KeyboardInterrupt: 