In [1]:
import time
import joblib as jl
import tensorflow as tf
import tensorlayer as tl

In [10]:
# use mnist
x_train, y_train, x_val, y_val, x_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 784))
# use 20newsgroups
x_train, y_train, x_val, y_val = jl.load('data.jl.z')

In [3]:
def trainning_framework(x_train, y_train, x_val, y_val, model):
    
    tf.reset_default_graph()
    sess = tf.InteractiveSession()
    
    
    n_epoch = 20
    batch_size = 12
    learning_rate = 0.0001
    print_freq = 5
    
    x, y_, y, cost, acc, net = model()
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    
    tl.layers.initialize_global_variables(sess)
    net.print_params()
    net.print_layers()
    
    train_net(sess, train_op, x, y, y_, cost, acc, net, x_train, y_train, x_val, y_val, 
              print_freq, batch_size, n_epoch)
    

    
def train_net(sess, train_op, x, y, y_, cost, acc, net, x_train, y_train, x_val, y_val, 
              print_freq =5, batch_size = 128, n_epoch = 200):
    for epoch in range(n_epoch):
        start_time = time.time()
        for x_train_a, y_train_a in tl.iterate.minibatches(x_train, y_train, batch_size, shuffle=True):
            feed_dict = {x: x_train_a, y_: y_train_a}
            feed_dict.update(net.all_drop)  # enable dropout or dropconnect layers
            sess.run(train_op, feed_dict=feed_dict)
            
        if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:        
            print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
            train_loss, train_acc, n_batch = 0, 0, 0
            for x_train_a, y_train_a in tl.iterate.minibatches(x_train, y_train, batch_size, shuffle=True):
                dp_dict = tl.utils.dict_to_one(net.all_drop)  # disable noise layers
                feed_dict = {x: x_train_a, y_: y_train_a}
                feed_dict.update(dp_dict)
                err, ac = sess.run([cost, acc], feed_dict=feed_dict)
                train_loss += err
                train_acc += ac
                n_batch += 1
            print("   train loss: %f" % (train_loss / n_batch))
            print("   train acc: %f" % (train_acc/ n_batch))

            val_loss, val_acc, n_batch = 0, 0, 0
            for x_val_a, y_val_a in tl.iterate.minibatches(x_val, y_val, batch_size, shuffle=True):
                dp_dict = tl.utils.dict_to_one(net.all_drop)  # disable noise layers
                feed_dict = {x: x_val_a, y_: y_val_a}
                feed_dict.update(dp_dict)
                err, ac = sess.run([cost, acc], feed_dict=feed_dict)
                val_loss += err
                val_acc += ac
                n_batch += 1
            print("   val loss: %f" % (val_loss / n_batch))
            print("   val acc: %f" % (val_acc / n_batch))

In [15]:
def fc_model():
    x = tf.placeholder(tf.float32, shape=[None, 784], name='x')
    y_ = tf.placeholder(tf.int64, shape=[None], name='y_')
    
    net = tl.layers.InputLayer(x, name='input')
    net = tl.layers.DropoutLayer(net, keep=0.8, name='drop1')
    net = tl.layers.DenseLayer(net, n_units=800, act=tf.nn.relu, name='relu1')
    net = tl.layers.DropoutLayer(net, keep=0.5, name='drop2')
    net = tl.layers.DenseLayer(net, n_units=800, act=tf.nn.relu, name='relu2')
    net = tl.layers.DropoutLayer(net, keep=0.5, name='drop3')
    net = tl.layers.DenseLayer(net, n_units=10, act=tf.identity, name='output')
    
    y = net.outputs
    
    cost = tl.cost.cross_entropy(y, y_, name='xentropy')
    correct_prediction = tf.equal(tf.argmax(y, 1), y_)
    acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    return x, y_, y, cost, acc, net

In [22]:
def cnn_model():
    x = tf.placeholder(tf.float32, shape=[None, 784], name='x')
    y_ = tf.placeholder(tf.int64, shape=[None], name='y_')
    
    net = tl.layers.InputLayer(x, name='input')
    net = tl.layers.ReshapeLayer(net, [-1, 28, 28, 1], name='reshape')
    net = tl.layers.Conv2dLayer(net,
                        act = tf.nn.relu,
                        shape = [5, 5, 1, 32],  # 32 features for each 5x5 patch
                        strides=[1, 1, 1, 1],
                        padding='SAME',
                        name ='cnn1')     # output: (?, 28, 28, 32)
    net = tl.layers.PoolLayer(net,
                        ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1],
                        padding='SAME',
                        pool = tf.nn.max_pool,
                        name ='pool1',)   # output: (?, 14, 14, 32)
    net = tl.layers.Conv2dLayer(net,
                        act = tf.nn.relu,
                        shape = [5, 5, 32, 64], # 64 features for each 5x5 patch
                        strides=[1, 1, 1, 1],
                        padding='SAME',
                        name ='cnn2')     # output: (?, 14, 14, 64)
    net = tl.layers.PoolLayer(net,
                        ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1],
                        padding='SAME',
                        pool = tf.nn.max_pool,
                        name ='pool2',)   # output: (?, 7, 7, 64)
    net = tl.layers.FlattenLayer(net, name='flatten')
    net = tl.layers.DenseLayer(net, n_units=128, act=tf.identity, name='fc1')
        
    net = tl.layers.DropoutLayer(net, keep=0.5, name='drop')
    net = tl.layers.DenseLayer(net, n_units=10, act=tf.identity, name='output')
    
    y = net.outputs
    
    cost = tl.cost.cross_entropy(y, y_, name='xentropy')
    correct_prediction = tf.equal(tf.argmax(y, 1), y_)
    acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    return x, y_, y, cost, acc, net

In [26]:
def cnn_text_model(vocab_size=30000, hidden_size=64):
    
    doc_length = 1024
    
    x = tf.placeholder(tf.int32, shape=(None, doc_length))
    y_ = tf.placeholder(tf.int64, shape=[None], name='y_')
        
    net = tl.layers.EmbeddingInputlayer(x, vocab_size, hidden_size, name='embedding')
    net = tl.layers.ReshapeLayer(net, [-1, doc_length, hidden_size, 1], name='reshape')
    
    cov1 = tl.layers.Conv2dLayer(net,
                        act = tf.nn.relu,
                        shape = [3, hidden_size, 1, 100],  
                        strides=[1, 1, 1, 1],
                        padding='VALID',
                        name ='cnn1')
    
    pool1 = tl.layers.PoolLayer(cov1,
                        ksize=[1, 1022, 1, 1],
                        strides=[1, 1, 1, 1],
                        padding='VALID',
                        pool = tf.nn.max_pool,
                        name ='pool1')
    
    cov2 = tl.layers.Conv2dLayer(net,
                        act = tf.nn.relu,
                        shape = [4, hidden_size, 1, 100],  
                        strides=[1, 1, 1, 1],
                        padding='VALID',
                        name ='cnn2')
    
    pool2 = tl.layers.PoolLayer(cov2,
                        ksize=[1, 1021, 1, 1],
                        strides=[1, 1, 1, 1],
                        padding='VALID',
                        pool = tf.nn.max_pool,
                        name ='pool2') 
    
    cov3 = tl.layers.Conv2dLayer(net,
                        act = tf.nn.relu,
                        shape = [5, hidden_size, 1, 100],  # 32 features for each 5x5 patch
                        strides=[1, 1, 1, 1],
                        padding='VALID',
                        name ='cnn3')
    
    pool3 = tl.layers.PoolLayer(cov3,
                        ksize=[1, 1020, 1, 1],
                        strides=[1, 1, 1, 1],
                        padding='VALID',
                        pool = tf.nn.max_pool,
                        name ='pool3')
    
    net = tl.layers.ConcatLayer([pool1, pool2, pool3], concat_dim=1, name ='concat_layer')
    net = tl.layers.FlattenLayer(net, name='flatten')
    net = tl.layers.DenseLayer(net, n_units=128, act=tf.identity, name='fc1')
    net = tl.layers.DropoutLayer(net, keep=0.5, name='drop')
    net = tl.layers.DenseLayer(net, n_units=20, act=tf.identity, name='output')
    
    y = net.outputs
    
    cost = tl.cost.cross_entropy(y, y_, name='xentropy')
    correct_prediction = tf.equal(tf.argmax(y, 1), y_)
    acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    return x, y_, y, cost, acc, net

In [27]:
trainning_framework(x_train, y_train, x_val, y_val, cnn_text_model)

[TL] EmbeddingInputlayer embedding: (30000, 64)
[TL] ReshapeLayer reshape: (?, 1024, 64, 1)
[TL] Conv2dLayer cnn1: shape:[3, 64, 1, 100] strides:[1, 1, 1, 1] pad:VALID act:relu
[TL] PoolLayer   pool1: ksize:[1, 1022, 1, 1] strides:[1, 1, 1, 1] padding:VALID pool:max_pool
[TL] Conv2dLayer cnn2: shape:[4, 64, 1, 100] strides:[1, 1, 1, 1] pad:VALID act:relu
[TL] PoolLayer   pool2: ksize:[1, 1021, 1, 1] strides:[1, 1, 1, 1] padding:VALID pool:max_pool
[TL] Conv2dLayer cnn3: shape:[5, 64, 1, 100] strides:[1, 1, 1, 1] pad:VALID act:relu
[TL] PoolLayer   pool3: ksize:[1, 1020, 1, 1] strides:[1, 1, 1, 1] padding:VALID pool:max_pool
[TL] ConcatLayer concat_layer: axis: 1
[TL] FlattenLayer flatten: 300
[TL] DenseLayer  fc1: 128 identity
[TL] DropoutLayer drop: keep:0.500000 is_fix:False
[TL] DenseLayer  output: 20 identity
[TL]   param   0: embedding/embeddings:0 (30000, 64)        float32_ref (mean: 5.6605334975756705e-05, median: 9.73232090473175e-05, std: 0.05771826207637787)   
[TL]   param 

Epoch 1 of 20 took 22.886410s
   train loss: 2.934517
   train acc: 0.165163
   val loss: 2.954755
   val acc: 0.126263
Epoch 5 of 20 took 18.299933s
   train loss: 0.856847
   train acc: 0.795648
   val loss: 1.203148
   val acc: 0.659357
Epoch 10 of 20 took 18.636708s
   train loss: 0.150140
   train acc: 0.973461
   val loss: 0.749526
   val acc: 0.782961
Epoch 15 of 20 took 18.670223s
   train loss: 0.018517
   train acc: 0.998142
   val loss: 0.760714
   val acc: 0.801568
Epoch 20 of 20 took 19.424948s
   train loss: 0.003117
   train acc: 0.999469
   val loss: 0.861047
   val acc: 0.803030
