In [None]:
from mxnet.gluon import nn
from mxnet import ndarray as nd
from data_helper.mr_loader import *
from mxnet import gluon, autograd, io
import mxnet as mx
from mxnet.gluon.data import Dataset, DataLoader

import time
from datetime import timedelta

In [None]:
def try_gpu():
    """If GPU is available, return mx.gpu(0); else return mx.cpu()"""
    try:
        ctx = mx.gpu()
        _ = nd.array([0], ctx=ctx)
    except:
        ctx = mx.cpu()
    return ctx

In [None]:
base_dir = 'data/mr'
pos_file = os.path.join(base_dir, 'rt-polarity.pos.txt')
neg_file = os.path.join(base_dir, 'rt-polarity.neg.txt')
vocab_file = os.path.join(base_dir, 'rt-polarity.vocab.txt')


save_path = 'checkpoints'  # model save path
if not os.path.exists(save_path):
    os.mkdir(save_path)
model_file = os.path.join(save_path, 'mr_cnn.params')


def get_time_dif(start_time):
    """
    Return the time used since start_time.
    """
    end_time = time.time()
    time_dif = end_time - start_time
    return timedelta(seconds=int(round(time_dif)))

In [None]:
class Conv_Max_Pooling(nn.Block):
    def __init__(self, channels, kernel_size, **kwargs):
        super(Conv_Max_Pooling, self).__init__(**kwargs)

        with self.name_scope():
            self.conv = nn.Conv1D(channels, kernel_size)
            self.pooling = nn.GlobalMaxPool1D()
        
    def forward(self, x):
        output = self.pooling(self.conv(x))
        return nd.relu(output).flatten()

In [None]:
class Config(object):
    """
    CNN parameters
    """
    embedding_dim = 128  # embedding vector size
    seq_length = 50  # maximum length of sequence
    vocab_size = 8000  # most common words

    num_filters = 100  # number of the convolution filters (feature maps)
    kernel_sizes = [3, 4, 5]   # three kinds of kernels (windows)

    dropout_prob = 0.5  # dropout rate
    learning_rate = 1e-3  # learning rate
    batch_size = 50  # batch size for training
    num_epochs = 20  # total number of epochs

    num_classes = 2  # number of classes

    dev_split = 0.1  # percentage of dev data

In [None]:
class TextCNN(nn.Block):
    def __init__(self, config, **kwargs):
        super(TextCNN, self).__init__(**kwargs)
        
        V = config.vocab_size
        E = config.embedding_dim
        Nf = config.num_filters
        Ks = config.kernel_sizes
        C = config.num_classes
        Dr = config.dropout_prob
        
        with self.name_scope():
            self.embedding = nn.Embedding(V, E)
            self.conv1 = Conv_Max_Pooling(Nf, Ks[0])
            self.conv2 = Conv_Max_Pooling(Nf, Ks[1])
            self.conv3 = Conv_Max_Pooling(Nf, Ks[2])
            self.dropout = nn.Dropout(Dr)
            self.fc1 = nn.Dense(C)
            
    def forward(self, x):
        x = self.embedding(x).transpose((0, 2, 1))
        o1, o2, o3 = self.conv1(x), self.conv2(x), self.conv3(x)
        outputs = self.fc1(self.dropout(nd.concat(o1, o2, o3)))
        
        return outputs

In [None]:
class MRDataset(Dataset):
    def __init__(self, x, y):
        super(MRDataset, self).__init__()
        self.x = x
        self.y = y
    
    def __getitem__(self, index):
        return self.x[index].astype(np.float32), self.y[index].astype(np.float32)
    
    def __len__(self):
        return len(self.x)

In [None]:
def evaluate(data_iterator, net, loss, ctx):
    """
    Evaluation, return accuracy and loss
    """
    total_loss, data_len = 0.0, 0
    acc = mx.metric.Accuracy() 
    
    for data, label in data_iterator:
        data, label = data.as_in_context(ctx), label.as_in_context(ctx)
        
        with autograd.record(train_mode=False):
            output = net(data)
            losses = loss(output, label)
        
        total_loss += nd.sum(losses).asscalar()
        data_len += len(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1], total_loss / data_len

In [None]:
print("Loading data...")
start_time = time.time()
config = Config()
corpus = Corpus(pos_file, neg_file, vocab_file, config.dev_split, config.seq_length, config.vocab_size)
print(corpus)
config.vocab_size = len(corpus.words)

print("Configuring CNN model...")
ctx = try_gpu()
model = TextCNN(config)
model.collect_params().initialize(ctx=ctx)
print("Initializing weights on", ctx)
print(model)

loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': config.learning_rate})

train_loader = DataLoader(MRDataset(corpus.x_train, corpus.y_train), batch_size=config.batch_size, shuffle=True)
test_loader = DataLoader(MRDataset(corpus.x_test, corpus.y_test), batch_size=config.batch_size, shuffle=False)

print("Training and evaluating...")
best_acc = 0.0
for epoch in range(config.num_epochs):
    for data, label in train_loader:
        data, label = data.as_in_context(ctx), label.as_in_context(ctx)
        batch_len = len(data)
        
        with autograd.record(train_mode=True):
            output = model(data)
            losses = loss(output, label)
            
        losses.backward()
        trainer.step(batch_len)
    
    train_acc, train_loss = evaluate(train_loader, model, loss, ctx)
    test_acc, test_loss = evaluate(test_loader, model, loss, ctx)
    
    if test_acc > best_acc:
        # store the best validation result
        best_acc = test_acc
        improved_str = '*'
        model.save_params(model_file)
    else:
        improved_str = ''
    
    time_dif = get_time_dif(start_time)
    msg = "Epoch {0:3}, Train_loss: {1:>7.2}, Train_acc {2:>6.2%}, " \
        + "Test_loss: {3:>6.2}, Test_acc {4:>6.2%}, Time: {5} {6}"
    print(msg.format(epoch + 1, train_loss, train_acc, test_loss, test_acc, time_dif, improved_str))  

In [None]:
def train():
    """
    Train and evaluate the model with training and validation data.
    """
    print('Loading data...')
    start_time = time.time()
    config = Config()
    corpus = Corpus(pos_file, neg_file, vocab_file, config.dev_split, config.seq_length, config.vocab_size)
    print(corpus)
    config.vocab_size = len(corpus.words)

    print('Configuring CNN model...')
    model = CNN(config)

    loss = gluon.loss.SoftmaxCrossEntropyLoss()
    trainer = gluon.Trainer(model.collect_params(), 'sgd', {'learning_rate': 0.01})
    
    total_batch = 0
    total_loss = 0.0
    best_acc_val = 0.0
    for epoch in range(config.num_epochs):
        print('Epoch:', epoch + 1)
        # load the training data in batch
        train_loader = io.NDArrayIter(data={'data': nd.array(corpus.x_train)}, 
                                      label={'label': nd.array(corpus.y_train)}, 
                                      batch_size=config.batch_size)
        for batch in train_loader:
            cur_batch = config.batch_size - batch.pad
            with autograd.record():
                output = model(batch.data[0])
                loss = softmax_cross_entropy(output, batch.label[:cur_batch])
            loss.backward()
            trainer.step(cur_batch)
            print(loss)

            optimizer.zero_grad()
            outputs = model(inputs)  # forward computation
            loss = criterion(outputs, targets)
            total_loss += loss.data[0]
            total_batch += 1

            if total_batch % config.print_per_batch == 0:
                # print out intermediate status
                avg_loss = total_loss / config.print_per_batch
                total_loss = 0.0

                _, pred_train = torch.max(outputs.data, 1)
                corrects = (pred_train == targets.data).sum()
                acc_train = corrects / len(x_batch)
                loss_val, acc_val, _ = evaluate(model, val_data)  # evaluate on val data

                if acc_val > best_acc_val:
                    # store the best validation result
                    best_acc_val = acc_val
                    improved_str = '*'
                    torch.save(model.state_dict(), model_file)
                else:
                    improved_str = ''

                time_dif = get_time_dif(start_time)
                msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \
                      + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}'
                print(msg.format(total_batch, avg_loss, acc_train, loss_val, acc_val, time_dif, improved_str))

            # back propagation
            loss.backward()
            optimizer.step()

    test(model, val_data)