In [5]:
import json
import os

import chainer
from chainer import training
from chainer import functions as F
from chainer.training import extensions

import nets
import text_datasets
from rawr import Bunch
from nlp_utils import convert_seq
from nlp_utils import convert_snli_seq

with open('result/args.json') as f:
    args = Bunch(json.load(f))
args.dataset = 'snli'
print(json.dumps(args.__dict__, indent=2))

{
  "batchsize": 64,
  "epoch": 30,
  "gpu": 0,
  "out": "result/imdb.binary",
  "unit": 300,
  "layer": 1,
  "dropout": 0.4,
  "dataset": "snli",
  "char_based": false,
  "vocab_path": "/fs/clip-ml/shifeng/rawr_transfer/result/vocab.json",
  "n_class": 2,
  "datetime": "2018-02-28 08:09:40.048870"
}


In [35]:
def get_snli(vocab=None, shrink=1, char_based=False):
    snli_dir = os.path.join(DATA_DIR, 'snli_1.0')
    if not os.path.exists(snli_dir):
        download_snli()

    print('read snli')
    train = read_snli(snli_dir, 'train', shrink=shrink, char_based=char_based)
    test = read_snli(snli_dir, 'dev', shrink=shrink, char_based=char_based)

    if vocab is None:
        print('construct vocabulary based on frequency')
        train_premise = [(x, z) for x, y, z in train]
        train_hypothesis = [(y, z) for x, y, z in train]
        vocab = make_vocab(train_premise + train_hypothesis)

    train = transform_snli_to_array(train, vocab)
    test = transform_snli_to_array(test, vocab)

    return train, test, vocab

In [36]:
train, test, vocab = get_snli(char_based=args.char_based)

print('# train data: {}'.format(len(train)))
print('# test  data: {}'.format(len(test)))
print('# vocab: {}'.format(len(vocab)))
n_class = len(set([int(d[2]) for d in train]))
print('# class: {}'.format(n_class))

read snli
construct vocabulary based on frequency
# train data: 550152
# test  data: 10000
# vocab: 20000
# class: 3


In [37]:
train_iter = chainer.iterators.SerialIterator(train, 20)
test_iter = chainer.iterators.SerialIterator(test, 20,
                                             repeat=False, shuffle=False)

In [38]:
import importlib
importlib.reload(nets)
import nets
model = nets.DoubleMaxClassifier(n_layers=args.layer, n_vocab=len(vocab),
                                 n_units=args.unit, n_class=n_class, 
                                 dropout=args.dropout)
if args.gpu >= 0:
    chainer.backends.cuda.get_device_from_id(args.gpu).use()
    model.to_gpu()

In [39]:
batch = next(train_iter)
batch = convert_snli_seq(batch, device=args.gpu, with_label=True)

In [40]:
ys = model.predict(batch['xs'])

In [41]:
ys.shape

(20, 3)