In [2]:
# Sentiment analysis with pretrained language model example

In [None]:
# Step 1: Load gluon
import mxnet as mx
from mxnet import gluon, autograd

In [None]:
#Step 2: Load SA raw data using gluon.data
train = gluon.data.text.IMDB(root='data/imdb', segment='train')
test = gluon.data.text.IMDB(root='data/imdb', segment='test')

In [None]:
#Step 3: Load user-defined tokenizer and Tokenize SA raw data
import spacy
spacy_en = spacy.load('en')

def tokenizer(text):
    return [tok.text for tok in spacy_en.tokenizer(text)]

train_tokenized, train_labels = [tokenizer(text), score for text, score in train]
test_tokenized, test_labels = [tokenizer(text), score for text, score in test]

In [None]:
#Step 4: Map tokenized data into nd array based instances according to lm's training data vocab
from mxnet.gluon import text #issue here
def get_frequencies(dataset):
    return collections.Counter(x for tup in dataset for x in tup[0]+tup[1][-1:])
lm_train_dataset = gluon.data.text.WikiText2(segment='train')
vocab = text.vocab.Vocabulary(get_frequencies(lm_train_dataset))

def encode_sentences(x_raw_samples, vocab):
    #TODO
    return x_encoded_samples
    
def encode_labels(y_raw_samples):
    #TODO
    return y_encoded_samples

def pad_sample(x_encoded_samples, maxlen = 500, val = 0):
    #TODO
    return x_samples
    
x_encoded_train = encode_sentences(train_tokenized, vocab)
x_encoded_test = encode_sentences(test_tokenized, vocab)

x_train = mx.nd.array(pad_sample(x_encoded_train, 500, 0))
x_test = mx.nd.array(pad_sample(x_encoded_test, 500, 0))

y_train = mx.nd.array(encode_labels(train_labels))
y_test = mx.nd.array(encode_labels(test_labels))

In [None]:
#Step 5: Build SA classifier: pretrained lm encoder's hidden state as feature + binary dense layer as decoder
from mxnet.gluon.model_zoo.text.lm import AWDLSTM

class SALSTM(AWDLSTM):
    def __init__(self, mode, vocab, embed_dim, hidden_dim, num_layers,
                 dropout=0.5, drop_h=0.5, drop_i=0.5, drop_e=0.1, weight_drop=0,
                 tie_weights=False, nclass, **kwargs):
        super(SALSTM, self).__init__(self, mode, vocab, embed_dim, hidden_dim, num_layers,
                 dropout=0.5, drop_h=0.5, drop_i=0.5, drop_e=0.1, weight_drop=0,
                 tie_weights=False, **kwargs)
        self._mode = mode
        self._embed_dim = embed_dim
        self._hidden_dim = hidden_dim
        self._num_layers = num_layers
        self._dropout = dropout
        self._drop_h = drop_h
        self._drop_i = drop_i
        self._drop_e = drop_e
        self._weight_drop = weight_drop
        self._tie_weights = tie_weights
        self.embedding = self._get_embedding()
        self.encoder = self._get_encoder()
        self.decoder = gluon.nn.Dense(nclass)
        with self.name_scope():
            self.add(self.embedding)
            self.add(self.encoder)
            self.add(self.decoder)
        def begin_state(self, *args, **kwargs):
            return self.encoder[0].begin_state(*args, **kwargs)
        def forward(self, inputs, begin_state=None): # pylint: disable=arguments-differ
            embedded_inputs = self.embedding(inputs)
            if not begin_state:
                begin_state = self.begin_state()
            encoded, state = self.encoder(embedded_inputs, begin_state)
            out = self.decoder(state)
            return out, state

In [None]:
#Step 6: Report evaluation results: train and test accuracy
def eval(x_samples, y_samples):
    #TODO
    return 0

In [None]:
#Step 7: Train SA model and evaluate on train and test set
context = mx.gpu(0)

##pretrained lm parameter set
lm_mode = 'lstm'
lm_emsize = 400
lm_nhid = 1150
lm_nlayers = 3
lm_dropout = 0.4
lm_dropout_h = 0.3
lm_dropout_i = 0.4
lm_dropout_e = 0.1
lm_weight_dropout = 0.65
lm_tied = True
##SA parameter set
nclass = 2
##hyper parameters
lr = 0.01
epochs = 10
##
batch_size = 1


model = SALSTM(lm_mode, vocab, lm_emsize, lm_nhid, lm_nlayers,
                    lm_dropout, lm_dropout_h, lm_dropout_i, lm_dropout_e, lm_weight_dropout,
                    lm_tied, nclass)

model.initialize(mx.init.Xavier(), ctx = context)
trainer = gluon.Trainer(model.collect_params(), 'sgd',
                       {'learning_rate': lr})
loss = gluon.loss.SoftmaxCrossEntropyLoss()

for epoch in range(epochs):
    for i, data in enumerate(x_train):
        data = data.as_in_context(context)
        target = y_train[i].as_in_context(context)
        with autograd.record():
            output = model(data)
            L = loss(output, target)
        L.backward()
        trainer.step(batch_size)
    train_accuracy = eval(x_train, y_train)
    test_accuracy = eval(x_test, y_test)
    print("Epoch %s. Train_acc %s, Test_acc %s"%(epoch, train_accuracy, test_accuracy))