In [1]:
import collections
import numpy as np

import chainer
from chainer import cuda
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions
from chainer import reporter


chainer.print_runtime_info()

Platform: Linux-4.15.0-66-generic-x86_64-with-Ubuntu-16.04-xenial
Chainer: 6.5.0
ChainerX: Not Available
NumPy: 1.17.3
CuPy:
  CuPy Version          : 6.5.0
  CUDA Root             : /usr/local/cuda-10.0
  CUDA Build Version    : 10000
  CUDA Driver Version   : 10010
  CUDA Runtime Version  : 10000
  cuDNN Build Version   : 7603
  cuDNN Version         : 7603
  NCCL Build Version    : 2402
  NCCL Runtime Version  : 2402
iDeep: Not Available


In [3]:
######### only once. ########
import os.path
from six.moves.urllib import request
import zipfile


request.urlretrieve(
    'https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip',
    'trainDevTestTrees_PTB.zip')
zf = zipfile.ZipFile('trainDevTestTrees_PTB.zip')
for name in zf.namelist():
    (dirname, filename) = os.path.split(name)
    if not filename == '':
        zf.extract(name, '.')

In [2]:
!ls trees

dev.txt  test.txt  train.txt


In [3]:
!head trees/dev.txt -n1

(3 (2 It) (4 (4 (2 's) (4 (3 (2 a) (4 (3 lovely) (2 film))) (3 (2 with) (4 (3 (3 lovely) (2 performances)) (2 (2 by) (2 (2 (2 Buy) (2 and)) (2 Accorsi))))))) (2 .)))


In [4]:
# parameters
n_epoch = 100  # number of epochs
n_units = 30  # number of units per layer
batchsize = 25  # minibatch size
n_label = 5  # number of labels
epoch_per_eval = 5  # number of epochs per evaluation
is_test = True
gpu_id = 0

if is_test:
    max_size = 10
else:
    max_size = None

In [5]:
import codecs
import re


class SexpParser(object):

    def __init__(self, line):
        self.tokens = re.findall(r'\(|\)|[^\(\) ]+', line)
        self.pos = 0

    def parse(self):
        assert self.pos < len(self.tokens)
        token = self.tokens[self.pos]
        assert token != ')'
        self.pos += 1

        if token == '(':
            children = []
            while True:
                assert self.pos < len(self.tokens)
                if self.tokens[self.pos] == ')':
                    self.pos += 1
                    break
                else:
                    children.append(self.parse())
            return children
        else:
            return token


def read_corpus(path, max_size):
    with codecs.open(path, encoding='utf-8') as f:
        trees = []
        for line in f:
            line = line.strip()
            tree = SexpParser(line).parse()
            trees.append(tree)
            if max_size and len(trees) >= max_size:
                break

    return trees


def convert_tree(vocab, exp):
    assert isinstance(exp, list) and (len(exp) == 2 or len(exp) == 3)

    if len(exp) == 2:
        label, leaf = exp
        if leaf not in vocab:
            vocab[leaf] = len(vocab)
        return {'label': int(label), 'node': vocab[leaf]}
    elif len(exp) == 3:
        label, left, right = exp
        node = (convert_tree(vocab, left), convert_tree(vocab, right))
        return {'label': int(label), 'node': node}

In [6]:

vocab = {}

train_data = [convert_tree(vocab, tree)
                        for tree in read_corpus('trees/train.txt', max_size)]
train_iter = chainer.iterators.SerialIterator(train_data, batchsize)

validation_data = [convert_tree(vocab, tree)
                                 for tree in read_corpus('trees/dev.txt', max_size)]
validation_iter = chainer.iterators.SerialIterator(validation_data, batchsize,
                                                                                   repeat=False, shuffle=False)

test_data = [convert_tree(vocab, tree)
                        for tree in read_corpus('trees/test.txt', max_size)]

In [7]:
print(test_data[0])

{'label': 2, 'node': ({'label': 3, 'node': ({'label': 3, 'node': 252}, {'label': 2, 'node': 71})}, {'label': 1, 'node': ({'label': 1, 'node': 253}, {'label': 2, 'node': 254})})}


In [8]:
class RecursiveNet(chainer.Chain):

    def traverse(self, node, evaluate=None, root=True):
        if isinstance(node['node'], int):
            # leaf node
            word = self.xp.array([node['node']], np.int32)
            loss = 0
            v = model.leaf(word)
        else:
            # internal node
            left_node, right_node = node['node']
            left_loss, left = self.traverse(left_node, evaluate=evaluate, root=False)
            right_loss, right = self.traverse(right_node, evaluate=evaluate, root=False)
            v = model.node(left, right)
            loss = left_loss + right_loss

        y = model.label(v)

        label = self.xp.array([node['label']], np.int32)
        t = chainer.Variable(label)
        loss += F.softmax_cross_entropy(y, t)

        predict = cuda.to_cpu(y.data.argmax(1))
        if predict[0] == node['label']:
            evaluate['correct_node'] += 1
        evaluate['total_node'] += 1

        if root:
            if predict[0] == node['label']:
                evaluate['correct_root'] += 1
            evaluate['total_root'] += 1

        return loss, v

    def __init__(self, n_vocab, n_units):
        super(RecursiveNet, self).__init__()
        with self.init_scope():
            self.embed = L.EmbedID(n_vocab, n_units)
            self.l = L.Linear(n_units * 2, n_units)
            self.w = L.Linear(n_units, n_label)

    def leaf(self, x):
        return self.embed(x)

    def node(self, left, right):
        return F.tanh(self.l(F.concat((left, right))))

    def label(self, v):
        return self.w(v)

    def __call__(self, x):
        accum_loss = 0.0
        result = collections.defaultdict(lambda: 0)
        for tree in x:
            loss, _ = self.traverse(tree, evaluate=result)
            accum_loss += loss

        reporter.report({'loss': accum_loss}, self)
        reporter.report({'total': result['total_node']}, self)
        reporter.report({'correct': result['correct_node']}, self)
        return accum_loss

In [9]:
model = RecursiveNet(len(vocab), n_units)

if gpu_id >= 0:
    model.to_gpu()

# Setup optimizer
optimizer = chainer.optimizers.AdaGrad(lr=0.1)
optimizer.setup(model)
optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.0001))

In [10]:
def _convert(batch, device):
  return batch

updater = chainer.training.StandardUpdater(
    train_iter, optimizer, device=gpu_id, converter=_convert)

trainer = chainer.training.Trainer(updater, (n_epoch, 'epoch'))
trainer.extend(
        extensions.Evaluator(validation_iter, model, device=gpu_id, converter=_convert),
        trigger=(epoch_per_eval, 'epoch'))
trainer.extend(extensions.LogReport())

trainer.extend(extensions.MicroAverage(
        'main/correct', 'main/total', 'main/accuracy'))
trainer.extend(extensions.MicroAverage(
        'validation/main/correct', 'validation/main/total',
        'validation/main/accuracy'))

trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
          'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
trainer.run()

epoch       main/loss   validation/main/loss  main/accuracy  validation/main/accuracy  elapsed_time
[J2           2135.57                           0.154585                                 7.00102       
[J5           1316.21     573.718               0.55907        0.415301                  9.32382       
[J7           904.587                           0.730594                                 11.3562       
[J10          1269.65     525.77                0.719111       0.453552                  13.6946       
[J12          909.97                            0.76884                                  15.8759       
[J15          522.208     560.91                0.869105       0.461749                  18.0683       
[J17          421.505                           0.921778                                 20.0697       
[J20          301.606     596.515               0.957991       0.486339                  22.3617       
[J22          252.883                           0.961574   

In [11]:
def evaluate(model, test_trees):
    result = collections.defaultdict(lambda: 0)
    with chainer.using_config('train', False), chainer.no_backprop_mode():
        for tree in test_trees:
            model.traverse(tree, evaluate=result)
    acc_node = 100.0 * result['correct_node'] / result['total_node']
    acc_root = 100.0 * result['correct_root'] / result['total_root']
    print(' Node accuracy: {0:.2f} %% ({1:,d}/{2:,d})'.format(
        acc_node, result['correct_node'], result['total_node']))
    print(' Root accuracy: {0:.2f} %% ({1:,d}/{2:,d})'.format(
        acc_root, result['correct_root'], result['total_root']))

print('Test evaluation')
evaluate(model, test_data)

Test evaluation
 Node accuracy: 51.60 %% (161/312)
 Root accuracy: 40.00 %% (4/10)
