In [1]:
import tensorflow as tf
import numpy as np
from sklearn.metrics import classification_report

In [2]:
VOCAB_SIZE = 5000
MAX_LEN = 400
BATCH_SIZE = 32
EMBED_DIM = 50
FILTERS = 250
N_CLASS = 2
N_EPOCH = 2
DISPLAY_STEP = 50

In [3]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=VOCAB_SIZE)
X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, maxlen=MAX_LEN)
X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, maxlen=MAX_LEN)

In [4]:
def pipeline_train(X, y, sess):
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    dataset = dataset.shuffle(len(X)).batch(BATCH_SIZE).repeat(N_EPOCH)
    iterator = dataset.make_initializable_iterator()
    X_ph = tf.placeholder(tf.int32, [None, MAX_LEN])
    y_ph = tf.placeholder(tf.int64, [None])
    sess.run(iterator.initializer, {X_ph: X, y_ph: y})
    return iterator

def pipeline_test(X, sess):
    dataset = tf.data.Dataset.from_tensor_slices(X)
    dataset = dataset.batch(BATCH_SIZE)
    iterator = dataset.make_initializable_iterator()
    X_ph = tf.placeholder(tf.int32, [None, MAX_LEN])
    sess.run(iterator.initializer, {X_ph: X})
    return iterator

In [5]:
def forward(x, reuse, is_training):
    with tf.variable_scope('model', reuse=reuse):
        x = tf.contrib.layers.embed_sequence(x, VOCAB_SIZE, EMBED_DIM)
        x = tf.layers.dropout(x, 0.2, training=is_training)
        feat_map = []
        for k_size in [3, 4, 5]:
            _x = tf.layers.conv1d(x, FILTERS, k_size, activation=tf.nn.relu)
            _x = tf.layers.max_pooling1d(_x, _x.get_shape().as_list()[1], 1)
            _x = tf.reshape(_x, (tf.shape(x)[0], FILTERS))
            feat_map.append(_x)
        x = tf.concat(feat_map, -1)
        x = tf.layers.dense(x, FILTERS, tf.nn.relu)
        logits = tf.layers.dense(x, N_CLASS)
    return logits

In [6]:
ops = {}
sess = tf.Session()

iter_train = pipeline_train(X_train, y_train, sess)
iter_test = pipeline_test(X_test, sess)

X_train_batch, y_train_batch = iter_train.get_next()
X_test_batch = iter_test.get_next()

logits_train_batch = forward(X_train_batch, reuse=False, is_training=True)
ops['pred_logits'] = forward(X_test_batch, reuse=True, is_training=False)

ops['global_step'] = tf.Variable(0, trainable=False)
ops['lr'] = tf.train.exponential_decay(5e-3, ops['global_step'], 1400, 0.2)

ops['loss'] = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits=logits_train_batch, labels=y_train_batch))

ops['train'] = tf.train.AdamOptimizer(ops['lr']).minimize(
    ops['loss'], global_step=ops['global_step'])

Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead


In [7]:
sess.run(tf.global_variables_initializer())
while True:
    try:
        sess.run(ops['train'])
    except tf.errors.OutOfRangeError:
        break
    else:
        step = sess.run(ops['global_step'])
        if step % DISPLAY_STEP == 0 or step == 1:
            loss, lr = sess.run([ops['loss'], ops['lr']])
            print("Step %d | Loss %.3f | LR: %.4f" % (step, loss, lr))

Step 1 | Loss 0.909 | LR: 0.0050
Step 50 | Loss 0.585 | LR: 0.0047
Step 100 | Loss 0.515 | LR: 0.0045
Step 150 | Loss 0.378 | LR: 0.0042
Step 200 | Loss 0.204 | LR: 0.0040
Step 250 | Loss 0.421 | LR: 0.0038
Step 300 | Loss 0.309 | LR: 0.0035
Step 350 | Loss 0.374 | LR: 0.0033
Step 400 | Loss 0.201 | LR: 0.0032
Step 450 | Loss 0.421 | LR: 0.0030
Step 500 | Loss 0.263 | LR: 0.0028
Step 550 | Loss 0.271 | LR: 0.0027
Step 600 | Loss 0.129 | LR: 0.0025
Step 650 | Loss 0.238 | LR: 0.0024
Step 700 | Loss 0.218 | LR: 0.0022
Step 750 | Loss 0.380 | LR: 0.0021
Step 800 | Loss 0.100 | LR: 0.0020
Step 850 | Loss 0.058 | LR: 0.0019
Step 900 | Loss 0.090 | LR: 0.0018
Step 950 | Loss 0.165 | LR: 0.0017
Step 1000 | Loss 0.128 | LR: 0.0016
Step 1050 | Loss 0.253 | LR: 0.0015
Step 1100 | Loss 0.233 | LR: 0.0014
Step 1150 | Loss 0.188 | LR: 0.0013
Step 1200 | Loss 0.094 | LR: 0.0013
Step 1250 | Loss 0.124 | LR: 0.0012
Step 1300 | Loss 0.188 | LR: 0.0011
Step 1350 | Loss 0.074 | LR: 0.0011
Step 1400 | Los

In [8]:
y_pred_li = []
while True:
    try:
        y_pred_li.append(sess.run(ops['pred_logits']))
    except tf.errors.OutOfRangeError:
        break
y_pred = np.argmax(np.vstack(y_pred_li), 1)
print("Accuracy: %.4f" % (y_pred==y_test).mean())
print(classification_report(y_test, y_pred))

Accuracy: 0.8921
             precision    recall  f1-score   support

          0       0.93      0.84      0.89     12500
          1       0.86      0.94      0.90     12500

avg / total       0.90      0.89      0.89     25000

