## Loading preprosed data

In [1]:
import numpy as np
train_data = np.load("train_features.npy")
label_data = np.load("train_labels.npy")

In [2]:
train_data.shape, label_data.shape

((164674, 500), (164674,))

In [3]:
NUM_OF_ACIDS = 21
EMBEDDING_SIZE = 32

## Model

### One hot encoding

In [4]:
import tensorflow as tf
tf.__version__

'1.5.0'

In [5]:
NUM_CLASSES = 6

def input_parser(features, label):
    # convert the label to one-hot encoding
    one_hot = tf.one_hot(label, NUM_CLASSES)
    return features, one_hot

In [54]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

In [55]:
tf.keras.backend.set_learning_phase(True)

In [56]:
def identity_block(input_tensor, kernel_size, filters, stage, block):
    """The identity block is the block that has no conv layer at shortcut.
    Arguments:
      input_tensor: input tensor
      kernel_size: default 3, the kernel size of middle conv layer at main path
      filters: list of integers, the filters of 3 conv layer at main path
      stage: integer, current stage label, used for generating layer names
      block: 'a','b'..., current block label, used for generating layer names
    Returns:
      Output tensor for the block.
    """
    filters1, filters2, filters3 = filters
    bn_axis = 2
    
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = Conv1D(filters1, 1, name=conv_name_base + '2a')(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = tf.nn.selu(x)
    
    x = Conv1D(filters2, kernel_size, padding='same', name=conv_name_base + '2b')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = tf.nn.selu(x)

    x = Conv1D(filters3, 1, name=conv_name_base + '2c')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

    x = layers.add([x, input_tensor])
    x = tf.nn.selu(x)
    return x


In [57]:
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=2):
    """conv_block is the block that has a conv layer at shortcut.
    Arguments:
      input_tensor: input tensor
      kernel_size: default 3, the kernel size of middle conv layer at main path
      filters: list of integers, the filters of 3 conv layer at main path
      stage: integer, current stage label, used for generating layer names
      block: 'a','b'..., current block label, used for generating layer names
      strides: Tuple of integers.
    Returns:
      Output tensor for the block.
    Note that from stage 3, the first conv layer at main path is with
    strides=(2,2)
    And the shortcut should have strides=(2,2) as well
    """
    filters1, filters2, filters3 = filters
    bn_axis = 2
    
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = Conv1D(
      filters1, 1, strides=strides, name=conv_name_base + '2a')(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = tf.nn.selu(x)

    x = Conv1D(filters2, kernel_size, padding='same', name=conv_name_base + '2b')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = tf.nn.selu(x)

    x = Conv1D(filters3, 1, name=conv_name_base + '2c')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

    shortcut = Conv1D(filters3, 1, strides=strides, name=conv_name_base + '1')(input_tensor)
    shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)

    x = layers.add([x, shortcut])
    x = tf.nn.selu(x)
    return x


Input 

In [58]:
features_placeholder = tf.placeholder(tf.int32, train_data.shape)
labels_placeholder = tf.placeholder(tf.int32, label_data.shape)

dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
dataset = dataset.shuffle(buffer_size=10000, reshuffle_each_iteration=True)
dataset = dataset.batch(64)
iterator = dataset.make_initializable_iterator()

sess.run(iterator.initializer, feed_dict={features_placeholder: train_data, labels_placeholder: label_data})

acid_embeddings = tf.get_variable("acid_embeddings", [NUM_OF_ACIDS, EMBEDDING_SIZE])

In [59]:
from tensorflow.python.keras._impl.keras import layers
from tensorflow.python.keras._impl.keras.layers import *

In [60]:
batch_features, batch_labels = iterator.get_next()
embedded_acids = tf.nn.embedding_lookup(acid_embeddings, batch_features)

In [61]:
embedded_acids_flatten = Flatten()(embedded_acids)

In [62]:
x= Conv1D(64, 7, strides=2, padding='same', name='conv1')(embedded_acids)
x = BatchNormalization(axis=1, name='bn_conv1')(x)
x = tf.nn.selu(x)
x = MaxPooling1D(3, strides=2)(x)

In [63]:
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=1)
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')

x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

# x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
# x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
# x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')

In [64]:
x = Flatten()(x)
x = Dense(6, activation='softmax', name='fc1000')(x)

In [65]:
loss_op = tf.losses.sparse_softmax_cross_entropy(labels=batch_labels, logits=x)
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())

### Evaluate

In [66]:
# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(x, 1, output_type=tf.int32), tf.squeeze(batch_labels))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [67]:
tf.global_variables_initializer().run()

## Training

In [68]:
# Training cycle
num_steps = 10000
display_step = 100
for step in range(1, num_steps + 1):

    try:
        # Run optimization
        sess.run(train_op)
    except tf.errors.OutOfRangeError:
        # Reload the iterator when it reaches the end of the dataset
        sess.run(iterator.initializer, feed_dict={features_placeholder: train_data, labels_placeholder: label_data})
        print ("Dataset was resetted")
        sess.run(train_op)

    if step % display_step == 0 or step == 1:
        loss, acc = sess.run([loss_op, accuracy])
        print("Step " + str(step) + ", Minibatch Loss={:.4f}".format(loss) + 
              ", Training Accuracy={:.3f}".format(acc))

print("Optimization Finished!")

Step 1, Minibatch Loss=1.8637, Training Accuracy=0.172
Step 100, Minibatch Loss=1.8248, Training Accuracy=0.219
Step 200, Minibatch Loss=1.8873, Training Accuracy=0.156
Step 300, Minibatch Loss=1.7780, Training Accuracy=0.266
Step 400, Minibatch Loss=1.7780, Training Accuracy=0.266
Step 500, Minibatch Loss=1.7155, Training Accuracy=0.328
Step 600, Minibatch Loss=1.8248, Training Accuracy=0.219
Step 700, Minibatch Loss=1.8873, Training Accuracy=0.156
Step 800, Minibatch Loss=1.7936, Training Accuracy=0.250


KeyboardInterrupt: 