In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.nn.utils.rnn import PackedSequence
from torch.nn.parameter import Parameter

import math
import numpy as np
from collections import OrderedDict

import neural_cls
from neural_cls.util.utils import *
from neural_cls.util import Trainer, Loader, Initializer

In [2]:
class RNNBase_BB(nn.Module):

    def __init__(self, mode, input_size, hidden_size, sigma_prior,
                 num_layers=1, batch_first=False,
                 dropout=0, bidirectional=True):
        
        super(RNNBase_BB, self).__init__()
        
        self.mode = mode
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.batch_first = batch_first
        self.dropout = dropout
        self.dropout_state = {}
        self.bidirectional = bidirectional
        num_directions = 2 if bidirectional else 1
        self.num_directions = num_directions
        self.sampled_weights = []
        self.sigma_prior = sigma_prior

        if mode == 'LSTM':
            gate_size = 4 * hidden_size
        elif mode == 'GRU':
            gate_size = 3 * hidden_size
        else:
            gate_size = hidden_size
        
        self.means = []
        self.logvars = []
                
        for layer in range(num_layers):
            for direction in range(num_directions):
                layer_input_size = input_size if layer == 0 else hidden_size * num_directions

                w_ih_mu = Parameter(torch.Tensor(gate_size, layer_input_size))
                w_hh_mu = Parameter(torch.Tensor(gate_size, hidden_size))                
                w_ih_logvar = Parameter(torch.Tensor(gate_size, layer_input_size))
                w_hh_logvar = Parameter(torch.Tensor(gate_size, hidden_size))
                                
                b_ih_mu = Parameter(torch.Tensor(gate_size))
                b_hh_mu = Parameter(torch.Tensor(gate_size))
                b_ih_logvar = Parameter(torch.Tensor(gate_size))
                b_hh_logvar = Parameter(torch.Tensor(gate_size))
                
                self.means += [w_ih_mu, w_hh_mu, b_ih_mu, b_hh_mu]
                self.logvars += [w_ih_logvar, w_hh_logvar, b_ih_logvar, b_hh_logvar]
                
                layer_params = (w_ih_mu,  w_ih_logvar, w_hh_mu, w_hh_logvar, b_ih_mu, b_ih_logvar, b_hh_mu, b_hh_logvar)

                suffix = '_reverse' if direction == 1 else ''
                param_names = ['weight_ih_l_mu{}{}', 'weight_ih_l_logvar{}{}', 'weight_hh_l_mu{}{}', 'weight_hh_l_logvar{}{}']
                param_names += ['bias_ih_l_mu{}{}', 'bias_ih_l_logvar{}{}', 'bias_hh_l_mu{}{}', 'bias_hh_l_logvar{}{}']
                
                param_names = [x.format(layer, suffix) for x in param_names]

                for name, param in zip(param_names, layer_params):
                    setattr(self, name, param)

        self.reset_parameters()
        self.lpw = 0
        self.lqw = 0

    def _apply(self, fn):
        ret = super(RNNBase_BB, self)._apply(fn)
        return ret

    def reset_parameters(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        logvar_init = math.log(stdv) * 2
        for mean in self.means:
            mean.data.uniform_(-stdv, stdv)
        for logvar in self.logvars:
            logvar.data.fill_(logvar_init)
            
    def get_all_weights(self, weights):
        
        start = 0
        all_weights = []
        for layer in range(self.num_layers):
            for direction in range(self.num_directions):
                w_ih = weights[start]
                w_hh = weights[start+1]
                b_ih = weights[start+2]
                b_hh = weights[start+3]
                start += 4
                all_weights.append([w_ih, w_hh, b_ih, b_hh])

        return all_weights
    
    def sample(self, usecuda = True):
        self.sampled_weights = []
        for i in range(len(self.means)):
            mean = self.means[i]
            logvar = self.logvars[i]
            eps = torch.zeros(mean.size())
            if usecuda:
                eps = eps.cuda()

            eps.normal_(0, self.sigma_prior)
            std = logvar.mul(0.5).exp()
            weight = mean + Variable(eps) * std
            self.sampled_weights.append(weight)
            
    def _calculate_prior(self, weights):
        lpw = 0.
        for w in weights:
            lpw += log_gaussian(w, 0, self.sigma_prior).sum()
        return lpw
    
    def _calculate_posterior(self, weights):
        lqw = 0.
        for i,w in enumerate(weights):
            lqw += log_gaussian_logsigma(w, self.means[i], 0.5*self.logvars[i]).sum()
        return lqw

    def forward(self, input, hx=None, usecuda = True):
        if self.training:
            self.sample(usecuda = usecuda)
            weights = self.sampled_weights
            self.lpw = self._calculate_prior(weights)
            self.lqw = self._calculate_posterior(weights)
        else:
            weights = self.means

        self.all_weights = self.get_all_weights(weights)
        
        is_packed = isinstance(input, PackedSequence)
        if is_packed:
            input, batch_sizes = input
            max_batch_size = batch_sizes[0]
        else:
            batch_sizes = None
            max_batch_size = input.size(0) if self.batch_first else input.size(1)

        if hx is None:
            num_directions = 2 if self.bidirectional else 1
            hx = torch.autograd.Variable(input.data.new(self.num_layers *
                                                        num_directions,
                                                        max_batch_size,
                                                        self.hidden_size).zero_(), requires_grad=False)
            if self.mode == 'LSTM':
                hx = (hx, hx)

        func = self._backend.RNN(
            self.mode,
            self.input_size,
            self.hidden_size,
            num_layers=self.num_layers,
            batch_first=self.batch_first,
            dropout=self.dropout,
            train=self.training,
            bidirectional=self.bidirectional,
            batch_sizes=batch_sizes,
            dropout_state=self.dropout_state,
            flat_weight=None
        )
        # change this line
        output, hidden = func(input, self.all_weights, hx)
        if is_packed:
            output = PackedSequence(output, batch_sizes)
        return output, hidden


class LSTM_BB(RNNBase_BB):

    def __init__(self, *args, **kwargs):
        super(LSTM_BB, self).__init__('LSTM', *args, **kwargs)

class baseRNN_BB(nn.Module):

    def __init__(self, vocab_size, hidden_size, input_dropout_p, output_dropout_p, n_layers, rnn_cell, 
                 max_len=25):
        
        super(baseRNN_BB, self).__init__()
        
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.max_len = max_len
        
        self.input_dropout_p = input_dropout_p
        self.output_dropout_p = output_dropout_p
        
        if rnn_cell.lower() == 'lstm':
            self.rnn_cell = LSTM_BB
        else:
            raise ValueError("Unsupported RNN Cell: {0}".format(rnn_cell))

        self.input_dropout = nn.Dropout(p=input_dropout_p)

    def forward(self, *args, **kwargs):
        raise NotImplementedError()

class EncoderRNN_BB(baseRNN_BB):

    def __init__(self, vocab_size, embedding_size ,hidden_size, sigma_prior, input_dropout_p=0, 
                 output_dropout_p=0, n_layers=1, bidirectional=True, rnn_cell='lstm'):
        
        super(EncoderRNN_BB, self).__init__(vocab_size, hidden_size, input_dropout_p, 
                                             output_dropout_p, n_layers, rnn_cell)

        self.embedding = nn.Embedding(vocab_size, embedding_size)
        
        self.rnn = self.rnn_cell(embedding_size, hidden_size, sigma_prior, n_layers,
                                 bidirectional=bidirectional, dropout=output_dropout_p,
                                 batch_first=True)

    def forward(self, words, input_lengths, usecuda = True):
        
        batch_size = words.size()[0]
        embedded = self.embedding(words)
        embedded = self.input_dropout(embedded)
        embedded = nn.utils.rnn.pack_padded_sequence(embedded, input_lengths, batch_first= True)
        _, output = self.rnn(embedded, usecuda = usecuda)
        output = output[0].transpose(0,1).contiguous().view(batch_size, -1)
        
        return output
    
    def get_lpw_lqw(self):
        
        lpw = self.rnn.lpw
        lqw = self.rnn.lqw
        return lpw, lqw

In [3]:
class BiLSTM_BB(nn.Module):
    
    def __init__(self, word_vocab_size, word_embedding_dim, word_hidden_dim, output_size, sigma_prior, 
                 pretrained=None, n_layers = 1, bidirectional = True, dropout_p = 0.5):
        
        super(BiLSTM_BB, self).__init__()
        
        self.word_vocab_size = word_vocab_size
        self.word_embedding_dim = word_embedding_dim
        self.word_hidden_dim = word_hidden_dim
        self.sigma_prior = sigma_prior
        
        self.initializer = Initializer()
        self.loader = Loader()
        
        self.word_encoder = EncoderRNN_BB(word_vocab_size, word_embedding_dim, word_hidden_dim, 
                                          sigma_prior = sigma_prior, n_layers = n_layers, 
                                          bidirectional = bidirectional)
        
        if pretrained is not None:
            self.word_encoder.embedding.weight = nn.Parameter(torch.FloatTensor(pretrained))
        
        self.dropout = nn.Dropout(p=dropout_p)
        
        hidden_size = 2*n_layers*word_hidden_dim if bidirectional else n_layers*word_hidden_dim
        self.linear = nn.Linear(hidden_size, output_size)
        self.lossfunc = nn.CrossEntropyLoss()
        
    def forward_pass(self, words, wordslen, usecuda=True):
        
        batch_size, max_len = words.size()
        word_features = self.word_encoder(words, wordslen, usecuda=usecuda)
        word_features = self.dropout(word_features)
        output = self.linear(word_features)
        
        return output
        
    def forward(self, words, tags, tagset_size, wordslen, n_batches, n_samples = 3, usecuda=True):
        
        batch_size, max_len = words.size()
        s_log_pw, s_log_qw, s_log_likelihood = 0., 0., 0.
                
        if usecuda:
            onehottags = Variable(torch.zeros(batch_size, tagset_size)).cuda()
        else:
            onehottags = Variable(torch.zeros(batch_size, tagset_size))
        onehottags.scatter_(1, tags.unsqueeze(1), 1)
                
        for _ in xrange(n_samples):
            output = self.forward_pass(words, wordslen, usecuda = usecuda)
            sample_log_pw, sample_log_qw = self.word_encoder.get_lpw_lqw()
            sample_log_likelihood = log_gaussian(onehottags, output, self.sigma_prior).sum() * max_len
            s_log_pw += sample_log_pw
            s_log_qw += sample_log_qw
            s_log_likelihood += sample_log_likelihood
        
        log_pw, log_qw, log_llh = s_log_pw/n_samples, s_log_qw/n_samples, s_log_likelihood/n_samples
        loss = bayes_loss_function(log_pw, log_qw, log_llh, n_batches, batch_size)
        
        return loss
            
    def predict(self, words, wordslen, scoreonly=False, usecuda=True):
        
        batch_size, max_len = words.size()
        word_features = self.word_encoder(words, wordslen)
        word_features = self.dropout(word_features)
        output = self.linear(word_features)
        
        scores = torch.max(F.softmax(output, dim =1), dim=1)[0].data.cpu().numpy()
        if scoreonly:
            return scores
        
        prediction = torch.max(output, dim=1)[1].data.cpu().numpy().tolist()
        return scores, prediction

In [4]:
parameters = OrderedDict()

parameters['wrdim'] = 300
parameters['ptrnd'] = 'wordvectors/glove.6B.300d.txt'

parameters['dpout'] = 0.5
parameters['wldim'] = 200
parameters['nepch'] = 10

parameters['lrate'] = 0.001
parameters['batch_size'] = 50
parameters['opsiz'] = 2
parameters['sigmp'] = float(np.exp(-3))

In [5]:
use_dataset = 'mareview'
dataset_path = os.path.join('datasets', use_dataset)
result_path = os.path.join('neural_cls/results/', use_dataset)
loader = Loader()

In [6]:
if use_dataset == 'trec':
    train_data, test_data, mappings = loader.load_trec(dataset_path, parameters['ptrnd'], 
                                                       parameters['wrdim'])
elif use_dataset == 'mareview':
    train_data, test_data, mappings = loader.load_mareview(dataset_path, parameters['ptrnd'], 
                                                       parameters['wrdim'])
else:
    raise NotImplementedError()
    
word_to_id = mappings['word_to_id']
tag_to_id = mappings['tag_to_id']
word_embeds = mappings['word_embeds']

Found 9758 unique words (202057 in total)
Loading pretrained embeddings from wordvectors/glove.6B.300d.txt...
Found 2 unique named entity tags
Loaded 400000 pretrained embeddings.


In [7]:
word_vocab_size = len(word_to_id)
word_embedding_dim = parameters['wrdim']
word_hidden_dim = parameters['wldim']
output_size = parameters['opsiz']
sigma_prior = parameters['sigmp']

model = BiLSTM_BB(word_vocab_size, word_embedding_dim, word_hidden_dim,
                 output_size, sigma_prior=sigma_prior, pretrained = word_embeds)

In [8]:
model_name = 'BiLSTM_BB'
if not os.path.exists(result_path):
    os.makedirs(result_path)
    
if not os.path.exists(os.path.join(result_path,model_name)):
    os.makedirs(os.path.join(result_path,model_name))

In [9]:
model.cuda()
learning_rate = parameters['lrate']
num_epochs = parameters['nepch']
print('Initial learning rate is: %s' %(learning_rate))
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

trainer = Trainer(model, optimizer, result_path, model_name, tag_to_id, usedataset='mareview') 
losses, all_F = trainer.train_model(num_epochs, train_data, test_data, learning_rate,
                                    batch_size = parameters['batch_size'])
    
plt.plot(losses)
plt.savefig(os.path.join(result_path, model_name, 'lossplot.png'))

Initial learning rate is: 0.001




1000 :  92.9076879883
2000 :  70.799197998
3000 :  67.2443574219
4000 :  70.1498837891
5000 :  56.1160869141
6000 :  55.0630898438
7000 :  58.9332850342
8000 :  53.4544295654
9000 :  52.1350965576
********************************************************************************
Accuracy: 0.831495, Best Accuracy: 0.831495
********************************************************************************
********************************************************************************
Accuracy: 0.788425, Best Accuracy: 0.788425
********************************************************************************
********************************************************************************
Saving Best Weights
********************************************************************************
********************************************************************************
Epoch 1 Complete: Time Taken 26


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


9958 :  57.7330526428
10958 :  37.3931756592
11958 :  42.540442627
12958 :  39.8780645752
13958 :  37.6225488281
14958 :  39.0733669434
15958 :  41.7455275879
16958 :  38.0560771484
17916 :  56.5936414185
18916 :  34.4455311279
********************************************************************************
Accuracy: 0.919130, Best Accuracy: 0.919130
********************************************************************************
********************************************************************************
Accuracy: 0.811195, Best Accuracy: 0.811195
********************************************************************************
********************************************************************************
Saving Best Weights
********************************************************************************
********************************************************************************
Epoch 2 Complete: Time Taken 26
19916 :  23.9550842896
20916 :  25.0934352417
21916 :  21.4783797

KeyboardInterrupt: 