In [1]:
from sklearn.metrics import classification_report

import tensorflow as tf
import numpy as np


VOCAB_SIZE = 5000
MAX_LEN = 400
BATCH_SIZE = 32
EMBED_DIM = 50
FILTERS = 250
N_CLASS = 2
N_EPOCH = 2
DISPLAY_STEP = 50

In [2]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=VOCAB_SIZE)
X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, maxlen=MAX_LEN)
X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, maxlen=MAX_LEN)

In [3]:
def init_dataset(X, y):
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    dataset = dataset.batch(BATCH_SIZE)
    iterator = dataset.make_initializable_iterator()
    return iterator

sess = tf.Session()
iter_train = init_dataset(X_train, y_train)
iter_test = init_dataset(X_test, y_test)

X_tr_ph = tf.placeholder(tf.int32, [None, MAX_LEN])
y_tr_ph = tf.placeholder(tf.int64, [None])
X_te_ph = tf.placeholder(tf.int32, [None, MAX_LEN])
y_te_ph = tf.placeholder(tf.int64, [None])
    
sess.run(iter_train.initializer, {X_tr_ph: X_train, y_tr_ph: y_train})
sess.run(iter_test.initializer, {X_te_ph: X_test, y_te_ph: y_test})

In [4]:
def forward(x, reuse, is_training):
    with tf.variable_scope('model', reuse=reuse):
        x = tf.contrib.layers.embed_sequence(x, VOCAB_SIZE, EMBED_DIM)
        x = tf.layers.dropout(x, 0.2, training=is_training)
        feat_map = []
        for k_size in [3, 4, 5]:
            _x = tf.layers.conv1d(x, FILTERS, k_size, activation=tf.nn.relu)
            _x = tf.layers.max_pooling1d(_x, _x.get_shape().as_list()[1], 1)
            _x = tf.reshape(_x, (tf.shape(x)[0], FILTERS))
            feat_map.append(_x)
        x = tf.concat(feat_map, -1)
        x = tf.layers.dense(x, FILTERS, tf.nn.relu)
        logits = tf.layers.dense(x, N_CLASS)
    return logits

In [5]:
ops = {'train':{}, 'test': {}}

X_train_batch, y_train_batch = iter_train.get_next()
X_test_batch, y_test_batch = iter_test.get_next()

logits_train_batch = forward(X_train_batch, reuse=False, is_training=True)
ops['test']['logits'] = forward(X_test_batch, reuse=True, is_training=False)

ops['train']['step'] = tf.Variable(0, trainable=False)
ops['train']['lr'] = tf.train.exponential_decay(5e-3, ops['train']['step'], 1400, 0.2)

ops['train']['loss'] = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits=logits_train_batch, labels=y_train_batch))

ops['train']['train_op'] = tf.train.AdamOptimizer(ops['train']['lr']).minimize(
    ops['train']['loss'], global_step=ops['train']['step'])

Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead


In [6]:
sess.run(tf.global_variables_initializer())
for epoch in range(1, N_EPOCH+1):
    while True:
        try:
            sess.run(ops['train']['train_op'])
        except tf.errors.OutOfRangeError:
            break
        else:
            step = sess.run(ops['train']['step'])
            if step % DISPLAY_STEP == 0 or step == 1:
                loss, lr = sess.run([ops['train']['loss'], ops['train']['lr']])
                print("Epoch %d | Step %d | Loss %.3f | LR: %.4f" % (epoch, step, loss, lr))
    if epoch != N_EPOCH:
        sess.run(iter_train.initializer, {X_tr_ph: X_train, y_tr_ph: y_train})

Epoch 1 | Step 1 | Loss 0.825 | LR: 0.0050
Epoch 1 | Step 50 | Loss 0.340 | LR: 0.0047
Epoch 1 | Step 100 | Loss 0.383 | LR: 0.0045
Epoch 1 | Step 150 | Loss 0.456 | LR: 0.0042
Epoch 1 | Step 200 | Loss 0.312 | LR: 0.0040
Epoch 1 | Step 250 | Loss 0.299 | LR: 0.0038
Epoch 1 | Step 300 | Loss 0.357 | LR: 0.0035
Epoch 1 | Step 350 | Loss 0.280 | LR: 0.0033
Epoch 1 | Step 400 | Loss 0.153 | LR: 0.0032
Epoch 1 | Step 450 | Loss 0.193 | LR: 0.0030
Epoch 1 | Step 500 | Loss 0.458 | LR: 0.0028
Epoch 1 | Step 550 | Loss 0.412 | LR: 0.0027
Epoch 1 | Step 600 | Loss 0.246 | LR: 0.0025
Epoch 1 | Step 650 | Loss 0.296 | LR: 0.0024
Epoch 1 | Step 700 | Loss 0.177 | LR: 0.0022
Epoch 1 | Step 750 | Loss 0.232 | LR: 0.0021
Epoch 2 | Step 800 | Loss 0.145 | LR: 0.0020
Epoch 2 | Step 850 | Loss 0.211 | LR: 0.0019
Epoch 2 | Step 900 | Loss 0.136 | LR: 0.0018
Epoch 2 | Step 950 | Loss 0.227 | LR: 0.0017
Epoch 2 | Step 1000 | Loss 0.110 | LR: 0.0016
Epoch 2 | Step 1050 | Loss 0.074 | LR: 0.0015
Epoch 2 | S

In [7]:
y_pred_li = []
while True:
    try:
        y_pred_li.append(sess.run(ops['test']['logits']))
    except tf.errors.OutOfRangeError:
        break
y_pred = np.argmax(np.vstack(y_pred_li), 1)
print("Accuracy: %.4f" % (y_pred==y_test).mean())
print(classification_report(y_test, y_pred))

Accuracy: 0.8948
             precision    recall  f1-score   support

          0       0.91      0.88      0.89     12500
          1       0.88      0.91      0.90     12500

avg / total       0.90      0.89      0.89     25000

