## Loading preprosed data

In [1]:
LEVEL="Level_4"

In [2]:
import numpy as np
train_data = np.load("train_features_"+LEVEL+".npy")
train_label = np.load("train_labels_"+LEVEL+".npy")
val_data = np.load("val_features_"+LEVEL+".npy")
val_label = np.load("val_labels_"+LEVEL+".npy")

In [3]:
train_data.shape, train_label.shape, val_data.shape, val_label.shape

((159709, 500), (159709,), (39928, 500), (39928,))

In [4]:
NUM_OF_ACIDS = 21
EMBEDDING_SIZE = 32
NUM_CLASSES = np.amax(val_label, axis=0)+1

In [5]:
NUM_CLASSES

1610

## Model

### One hot encoding

In [6]:
import tensorflow as tf
tf.__version__

'1.5.0'

In [7]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

Input 

In [8]:
features_placeholder = tf.placeholder(tf.int32, (None, train_data.shape[1]))
labels_placeholder = tf.placeholder(tf.int32, (None,))
is_training = tf.placeholder(tf.bool, name="is_training")

dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
dataset = dataset.shuffle(buffer_size=10000, reshuffle_each_iteration=True)
dataset = dataset.batch(64)
iterator = dataset.make_initializable_iterator()

sess.run(iterator.initializer, feed_dict={features_placeholder: train_data, labels_placeholder: train_label})

acid_embeddings = tf.get_variable("acid_embeddings", [NUM_OF_ACIDS, EMBEDDING_SIZE])

In [9]:
batch_features, batch_labels = iterator.get_next()
embedded_acids = tf.nn.embedding_lookup(acid_embeddings, batch_features)

In [10]:
embedded_acids_flatten = tf.layers.flatten(embedded_acids)

In [11]:
# Convolutional Layer #1
conv1 = tf.layers.conv1d(
  inputs=embedded_acids,
  filters=32,
  kernel_size=5,
  padding="same",
  activation=tf.nn.selu)

  # Pooling Layer #1
pool1 = tf.layers.max_pooling1d(inputs=conv1, pool_size=2, strides=2)

# Convolutional Layer #2 and Pooling Layer #2
conv2 = tf.layers.conv1d(
  inputs=pool1,
  filters=64,
  kernel_size=5,
  padding="same",
  activation=tf.nn.selu)
pool2 = tf.layers.max_pooling1d(inputs=conv2, pool_size=2, strides=2)

# Dense Layer
pool2_flat = tf.layers.flatten(pool2)
dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.selu)
dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=is_training)

# Logits Layer
x = tf.layers.dense(inputs=dropout, units=6)

In [23]:
loss_op = tf.losses.sparse_softmax_cross_entropy(labels=batch_labels, logits=x)
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())

### Evaluate

In [24]:
# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(x, 1, output_type=tf.int32), tf.squeeze(batch_labels))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [25]:
tf.global_variables_initializer().run()
saver = tf.train.Saver(tf.global_variables())

### Training helpers

In [15]:
def print_progress(step, loss, acc):
    print("Step {}, Minibatch Loss={:.4f}, Training Accuracy={:.3f}".format(str(step), loss, acc))

In [16]:
def validation(sess, val_data, val_label):
    losses = []
    accuracies = []
    sess.run(iterator.initializer, feed_dict={features_placeholder: val_data, labels_placeholder: val_label})
    while True:
        try:
            # Run optimization
            loss, acc = sess.run([loss_op, accuracy], feed_dict={is_training: False})
            losses.append(loss)
            accuracies.append(acc)
        except tf.errors.OutOfRangeError:
            print ("Validation dataset is over")
            break
    loss_avg = sum(losses)/len(losses)
    acc_avg = sum(accuracies)/len(accuracies)
    print_progress("VALIDATION_STEP", loss_avg, acc_avg)

# Training

In [26]:
# Training cycle
NUM_EPOCH=5
DISPLAY_STEP = 100
epoch = 0
step = 0
while epoch < NUM_EPOCH:
    try:
        # Run optimization
        sess.run(train_op, feed_dict={is_training: True})
        step = step + 1
    except tf.errors.OutOfRangeError:
        print ("Reloading the iterator as epoch is finished")
        validation(sess, val_data, val_label)
        epoch = epoch + 1
        sess.run(iterator.initializer, feed_dict={features_placeholder: train_data, labels_placeholder: train_label})

    if step % DISPLAY_STEP == 0 or step == 1:
        loss, acc = sess.run([loss_op, accuracy], feed_dict={is_training: True})
        print_progress(step, loss, acc)

print("Training is Finished!")

Step 1, Minibatch Loss=7.8345, Training Accuracy=0.031
Step 100, Minibatch Loss=2.6947, Training Accuracy=0.578
Step 200, Minibatch Loss=2.0440, Training Accuracy=0.656
Step 300, Minibatch Loss=1.1548, Training Accuracy=0.797
Step 400, Minibatch Loss=1.3773, Training Accuracy=0.781
Step 500, Minibatch Loss=1.3851, Training Accuracy=0.766
Step 600, Minibatch Loss=0.6354, Training Accuracy=0.891
Step 700, Minibatch Loss=1.1600, Training Accuracy=0.812
Step 800, Minibatch Loss=1.5294, Training Accuracy=0.797
Step 900, Minibatch Loss=0.8012, Training Accuracy=0.859
Step 1000, Minibatch Loss=1.1610, Training Accuracy=0.812
Step 1100, Minibatch Loss=0.3635, Training Accuracy=0.891
Step 1200, Minibatch Loss=1.1254, Training Accuracy=0.797
Step 1300, Minibatch Loss=0.5955, Training Accuracy=0.906
Step 1400, Minibatch Loss=0.9833, Training Accuracy=0.828
Step 1500, Minibatch Loss=0.6447, Training Accuracy=0.891
Step 1600, Minibatch Loss=1.2576, Training Accuracy=0.828
Step 1700, Minibatch Loss=

In [27]:
PATH = "saved_models/cnn_level4/version1.ckpt"

## Saving

In [28]:
 def save_weights(saver, sess, path):
    save_path = saver.save(sess, path)
    print("Model saved in path: %s" % save_path)

In [29]:
save_weights(saver, sess, PATH)

Model saved in path: saved_models/cnn_level4/version1.ckpt


## Restoring

In [20]:
def restore_weights(saver, sess, path):
    saver.restore(sess, path)
    print("Model restored.")

In [21]:
restore_weights(saver, sess, PATH)

INFO:tensorflow:Restoring parameters from saved_models/cnn_level3/version1.ckpt
Model restored.


In [22]:
x = tf.layers.dense(inputs=dropout, units=NUM_CLASSES)