### Temporal Convolutional Networks Overview

![TCNs](https://cdn-images-1.medium.com/max/1000/1*1cK-UEWHGaZLM-4ITCeqdQ.png)

# Global Parameters

In [17]:
LEVEL="Level_4"

## Input

In [18]:
import tensorflow as tf
import numpy as np
print(tf.__version__)

1.7.0


In [19]:
import pandas as pd
pretrained_emb = pd.read_csv("../../data/protVec_100d_3grams.csv", delimiter="\t")

In [20]:
pretrained_emb.shape

(9048, 101)

In [21]:
pretrained_emb.head()

Unnamed: 0,words,d1,d2,d3,d4,d5,d6,d7,d8,d9,...,d91,d92,d93,d94,d95,d96,d97,d98,d99,d100
0,AAA,-0.17406,-0.095756,0.059515,0.039673,-0.375934,-0.115415,0.090725,0.173422,0.29252,...,0.244482,0.015974,0.012903,0.137528,0.13814,0.005474,0.070719,-0.164084,-0.179274,0.184899
1,ALA,-0.114085,-0.093288,0.1558,-0.037351,-0.121446,0.084037,0.023819,0.093442,0.143256,...,0.075584,-0.139661,0.034863,0.056078,0.028975,-0.012233,0.059669,0.037811,-0.172493,0.074655
2,LLL,-0.075594,-0.100834,-0.046616,-0.20898,-0.008596,-0.038612,-0.04936,0.06072,-0.062662,...,0.174677,-0.175961,-0.193242,-0.072965,-0.07556,0.158286,-0.026378,0.037155,-0.176038,0.319293
3,LAA,-0.137546,-0.135425,0.121566,-0.038295,-0.212129,0.040009,0.078545,0.029837,0.138343,...,0.133947,-0.156484,-0.048541,0.141848,0.081842,0.070573,0.006927,0.035281,-0.138971,0.105997
4,AAL,-0.156112,-0.133524,0.114426,-0.020264,-0.058513,0.057005,0.076881,0.054781,0.129436,...,0.154597,-0.05044,0.054866,0.066185,0.017498,0.001773,-0.083944,-0.003867,-0.106367,0.070706


In [42]:
embedding_weights_array = pretrained_emb.drop("words", axis = 1).as_matrix()

In [23]:
train_data = np.load("../../data/emb_train_features_"+LEVEL+".npy")
train_label = np.load("../../data/emb_train_labels_"+LEVEL+".npy")
val_data = np.load("../../data/emb_val_features_"+LEVEL+".npy")
val_label = np.load("../../data/emb_val_labels_"+LEVEL+".npy")

## Building TCNs

###  Causal Convolution

###  Spatial Dropout

Reference: https://stats.stackexchange.com/questions/282282/how-is-spatial-dropout-in-2d-implemented

Actually, simply setting noise_shape in tf.layers.Dropout will do the trick.

In [24]:
tf.reset_default_graph()
with tf.Graph().as_default() as g:
    x = tf.random_normal((32, 4, 10)) # (batch_size, channel, length)
    dropout = tf.layers.Dropout(0.5, noise_shape=[x.shape[0], x.shape[1], tf.constant(1)])
    output = dropout(x, training=True)
    init = tf.global_variables_initializer()
    
with tf.Session(graph=g) as sess:
    # Run the initializer
    sess.run(init)
    res = sess.run(output)
    print(res.shape)   
    print(res[0, :, :])
    print(res[1, :, :])

(32, 4, 10)
[[ 0.          0.         -0.         -0.         -0.         -0.
   0.         -0.         -0.          0.        ]
 [ 0.          0.         -0.          0.         -0.          0.
   0.          0.         -0.         -0.        ]
 [ 1.4055752  -1.7585988  -3.4812396   2.007582   -3.5614364  -2.109531
  -2.0887272  -4.7906737  -2.1057696   0.18137601]
 [ 1.4158698  -3.69789    -1.4131063  -2.2818372  -1.8009049  -0.95027846
   2.6366224   2.420438   -0.96060276  1.865662  ]]
[[ 0.9109896  -0.40115687 -3.40389    -2.098538   -2.512576   -3.6847801
   0.6041635   0.12205655  1.3454405   0.8392874 ]
 [ 0.         -0.         -0.         -0.         -0.          0.
   0.          0.         -0.          0.        ]
 [ 0.         -0.          0.         -0.         -0.          0.
  -0.         -0.         -0.          0.        ]
 [-0.          0.          0.         -0.          0.          0.
   0.          0.          0.          0.        ]]


### Temporal blocks

Note: `tf.contrib.layers.layer_norm` only supports `channels_last`.

In [25]:
# Redefining CausalConv1D to simplify its return values
class CausalConv1D(tf.layers.Conv1D):
    def __init__(self, filters,
               kernel_size,
               strides=1,
               dilation_rate=1,
               activation=None,
               use_bias=True,
               kernel_initializer=None,
               bias_initializer=tf.zeros_initializer(),
               kernel_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               kernel_constraint=None,
               bias_constraint=None,
               trainable=True,
               name=None,
               **kwargs):
        super(CausalConv1D, self).__init__(
            filters=filters,
            kernel_size=kernel_size,
            strides=strides,
            padding='valid',
            data_format='channels_last',
            dilation_rate=dilation_rate,
            activation=activation,
            use_bias=use_bias,
            kernel_initializer=kernel_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
            activity_regularizer=activity_regularizer,
            kernel_constraint=kernel_constraint,
            bias_constraint=bias_constraint,
            trainable=trainable,
            name=name, **kwargs
        )
       
    def call(self, inputs):
        padding = (self.kernel_size[0] - 1) * self.dilation_rate[0]
        inputs = tf.pad(inputs, tf.constant([(0, 0,), (1, 0), (0, 0)]) * padding)
        return super(CausalConv1D, self).call(inputs)

In [26]:
class TemporalBlock(tf.layers.Layer):
    def __init__(self, n_outputs, kernel_size, strides, dilation_rate, dropout=0.2, 
                 trainable=True, name=None, dtype=None, 
                 activity_regularizer=None, **kwargs):
        super(TemporalBlock, self).__init__(
            trainable=trainable, dtype=dtype,
            activity_regularizer=activity_regularizer,
            name=name, **kwargs
        )        
        self.dropout = dropout
        self.n_outputs = n_outputs
        self.conv1 = CausalConv1D(
            n_outputs, kernel_size, strides=strides, 
            dilation_rate=dilation_rate, activation=tf.nn.relu, 
            name="conv1")
        self.conv2 = CausalConv1D(
            n_outputs, kernel_size, strides=strides, 
            dilation_rate=dilation_rate, activation=tf.nn.relu, 
            name="conv2")
        self.down_sample = None

    
    def build(self, input_shape):
        channel_dim = 2
        self.dropout1 = tf.layers.Dropout(self.dropout, [tf.constant(1), tf.constant(1), tf.constant(self.n_outputs)])
        self.dropout2 = tf.layers.Dropout(self.dropout, [tf.constant(1), tf.constant(1), tf.constant(self.n_outputs)])
        if input_shape[channel_dim] != self.n_outputs:
            # self.down_sample = tf.layers.Conv1D(
            #     self.n_outputs, kernel_size=1, 
            #     activation=None, data_format="channels_last", padding="valid")
            self.down_sample = tf.layers.Dense(self.n_outputs, activation=None)
    
    def call(self, inputs, training=True):
        x = self.conv1(inputs)
        x = tf.contrib.layers.layer_norm(x)
        x = self.dropout1(x, training=training)
        x = self.conv2(x)
        x = tf.contrib.layers.layer_norm(x)
        x = self.dropout2(x, training=training)
        if self.down_sample is not None:
            inputs = self.down_sample(inputs)
        return tf.nn.relu(x + inputs)

In [27]:
tf.reset_default_graph()
with tf.Graph().as_default() as g:
    x = tf.random_normal((32, 10, 4)) # (batch_size, length, channel)
    tblock = TemporalBlock(8, 2, 1, 4) #n_outputs, kernel_size, strides, dilation_rate
    output = tblock(x, training=tf.constant(True))
    init = tf.global_variables_initializer()
    
with tf.Session(graph=g) as sess:
    # Run the initializer
    sess.run(init)
    res = sess.run(output)
    print(res.shape)   
    print(res[0, :, 0])
    print(res[1, :, 1])

Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
Instructions for updating:
Use the retry module or similar alternatives.
(32, 10, 8)
[0.         0.00986617 1.1228504  0.         0.         0.08626242
 0.23798266 0.         0.162916   0.2332388 ]
[0.5366014  0.2239842  0.81678814 0.         0.75807244 0.95753396
 0.         0.09215485 0.         0.42887744]


### Temporal convolutional networks

In [28]:
class TemporalConvNet(tf.layers.Layer):
    def __init__(self, num_channels, kernel_size=2, dropout=0.2,
                 trainable=True, name=None, dtype=None, 
                 activity_regularizer=None, **kwargs):
        super(TemporalConvNet, self).__init__(
            trainable=trainable, dtype=dtype,
            activity_regularizer=activity_regularizer,
            name=name, **kwargs
        )
        self.layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            out_channels = num_channels[i]
            self.layers.append(
                TemporalBlock(out_channels, kernel_size, strides=1, dilation_rate=dilation_size,
                              dropout=dropout, name="tblock_{}".format(i))
            )
    
    def call(self, inputs, training=True):
        outputs = inputs
        for layer in self.layers:
            outputs = layer(outputs, training=training)
        return outputs

In [29]:
tf.reset_default_graph()
with tf.Graph().as_default() as g:
    x = tf.random_normal((32, 10, 4)) # (batch_size, length, channel)
    tcn = TemporalConvNet([8, 8, 8, 8], 2, 0.25)
    output = tcn(x, training=tf.constant(True))
    init = tf.global_variables_initializer()
    
with tf.Session(graph=g) as sess:
    # Run the initializer
    sess.run(init)
    res = sess.run(output)
    print(res.shape)   
    print(res[0, :, 0])
    print(res[1, :, 1])

(32, 10, 8)
[0.95289564 0.         0.         2.019168   3.286333   2.2514975
 9.05715    0.         3.7902834  0.80918753]
[0.32888985 1.3946958  0.         0.20940956 0.         0.
 4.659095   0.         4.667108   3.3278246 ]


In [30]:
tf.reset_default_graph()
g = tf.Graph()
with g.as_default():
    Xinput = tf.placeholder(tf.float32, shape=[None, 10, 4])
    tcn = TemporalConvNet([8, 8, 8, 8], 2, 0.25)
    output = tcn(Xinput, training=tf.constant(True))
    init = tf.global_variables_initializer()
    
with tf.Session(graph=g) as sess:
    # Run the initializer
    sess.run(init)
    res = sess.run(output, {Xinput: np.random.randn(32, 10, 4)})
    print(res.shape)   
    print(res[0, :, 0])
    print(res[1, :, 1])

(32, 10, 8)
[0.         0.         2.273004   1.6866876  0.28753886 0.
 6.327194   0.         2.7823796  2.7996778 ]
[1.2137653 0.        0.        0.        0.        0.        0.3347146
 0.        0.        3.073979 ]


# Model

In [54]:
# Training Parameters
learning_rate = 0.001
batch_size = 128
batches_per_epoch = int(train_data.shape[0]/batch_size)+1
num_epochs = 12
print("Number of epochs: {} with batches per epoch: {}".format(num_epochs, batches_per_epoch))

# Network Parameters
sequence_length=train_data.shape[1]
num_classes = np.amax(val_label, axis=0)+1 
num_of_kmer = len(embedding_weights_array)
embedding_size = len(embedding_weights_array[0])

dropout = 0.1
kernel_size = 3
levels = 6
nhid = 64 # hidden layer num of features

Number of epochs: 12 with batches per epoch: 1245


In [55]:
tf.reset_default_graph()
graph = tf.Graph()
with graph.as_default():
    tf.set_random_seed(10)
    
    with tf.variable_scope('input'):
        sequences = tf.placeholder(tf.int32, [None, sequence_length], name='sequences')
        labels = tf.placeholder(tf.int32, (None,))
        is_training = tf.placeholder(tf.bool, name='is_train')

        dataset = (tf.data.Dataset.from_tensor_slices((sequences, labels))
                   .shuffle(buffer_size=10000, reshuffle_each_iteration=True)
                   .apply(tf.contrib.data.batch_and_drop_remainder(batch_size)))
    
        iterator = dataset.make_initializable_iterator()
        
    
    with tf.variable_scope('embedding'):
        weights_initializer = tf.constant_initializer(embedding_weights_array)
        embedding_weights = tf.get_variable(
            name='embedding_weights', 
            shape=(num_of_kmer, embedding_size), 
            initializer=weights_initializer,
            trainable=False)
#         acid_embeddings = tf.get_variable("acid_embeddings", [num_of_acids, embedding_size])

        batch_sequences, batch_labels = iterator.get_next()

        embedded_sequences = tf.nn.embedding_lookup(embedding_weights, batch_sequences)
#         embedded_sequences = tf.nn.embedding_lookup(acid_embeddings, batch_sequences)
        embedded_sequences = tf.reshape(embedded_sequences, 
                                             shape=[-1, sequence_length, embedding_size], 
                                             name='embedded_real_sequences')    
    # Define weights
    with tf.variable_scope('tcn'):
        logits = tf.layers.dense(TemporalConvNet([nhid] * levels, kernel_size, 
                                                 dropout)(embedded_sequences, training=is_training)[:, -1, :],
            num_classes, activation=None, kernel_initializer=tf.orthogonal_initializer())
   

    # Define loss and optimizer
    with tf.name_scope("loss_op"):
        loss_op = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(labels=batch_labels, logits=logits))
        tf.summary.scalar("loss_op", loss_op)
    
    with tf.name_scope("optimizer"):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        train_op = optimizer.minimize(loss_op)

    with tf.name_scope("accuracy"):
        prediction = tf.nn.softmax(logits)
        correct_pred = tf.equal(tf.argmax(prediction, 1, output_type=tf.int32), tf.squeeze(batch_labels))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        tf.summary.scalar("accuracy", accuracy)
    
    summ = tf.summary.merge_all()
    
     # Initialize the variables (i.e. assign their default value)
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    print("All parameters:", np.sum([np.product([xi.value for xi in x.get_shape()]) for x in tf.global_variables()]))
    print("Trainable parameters:", np.sum([np.product([xi.value for xi in x.get_shape()]) for x in tf.trainable_variables()]))
    [ print("{}{}".format(x.name, x.shape)) for x in tf.trainable_variables() if "LayerNorm" not in x.name]

All parameters: 1644785.0
Trainable parameters: 246661
tcn/temporal_conv_net/tblock_0/conv1/kernel:0(3, 100, 64)
tcn/temporal_conv_net/tblock_0/conv1/bias:0(64,)
tcn/temporal_conv_net/tblock_0/conv2/kernel:0(3, 64, 64)
tcn/temporal_conv_net/tblock_0/conv2/bias:0(64,)
tcn/temporal_conv_net/tblock_0/dense/kernel:0(100, 64)
tcn/temporal_conv_net/tblock_0/dense/bias:0(64,)
tcn/temporal_conv_net/tblock_1/conv1/kernel:0(3, 64, 64)
tcn/temporal_conv_net/tblock_1/conv1/bias:0(64,)
tcn/temporal_conv_net/tblock_1/conv2/kernel:0(3, 64, 64)
tcn/temporal_conv_net/tblock_1/conv2/bias:0(64,)
tcn/temporal_conv_net/tblock_2/conv1/kernel:0(3, 64, 64)
tcn/temporal_conv_net/tblock_2/conv1/bias:0(64,)
tcn/temporal_conv_net/tblock_2/conv2/kernel:0(3, 64, 64)
tcn/temporal_conv_net/tblock_2/conv2/bias:0(64,)
tcn/temporal_conv_net/tblock_3/conv1/kernel:0(3, 64, 64)
tcn/temporal_conv_net/tblock_3/conv1/bias:0(64,)
tcn/temporal_conv_net/tblock_3/conv2/kernel:0(3, 64, 64)
tcn/temporal_conv_net/tblock_3/conv2/bias

In [56]:
def print_progress(step, loss, acc):
    print("Step {}, Loss={:.4f}, Accuracy={:.3f}".format(str(step), loss, acc))

In [57]:
def validation(epoch):    
    # Calculate batch loss and accuracy
    losses = []
    accuracies = []
    sess.run(iterator.initializer, feed_dict={sequences: val_data, labels: val_label})
    while True:
        try:
            # Run optimization
            loss, acc = sess.run([loss_op, accuracy], feed_dict={is_training: False})
            losses.append(loss)
            accuracies.append(acc)
        except tf.errors.OutOfRangeError:
            break
    loss_avg = sum(losses)/len(losses)
    acc_avg = sum(accuracies)/len(accuracies)
    print_progress("VALIDATION for epoch {}".format(epoch), loss_avg, acc_avg)
    return acc_avg

## Start training

In [58]:
from pathlib import Path
import random 
from datetime import datetime
path = "../../logs/tcn_sequence/"
log_dir = "{}{}".format(path, datetime.now().strftime("%Y%m%d_%H%M"))
Path(log_dir).mkdir(exist_ok=True, parents=True)
tb_writer = tf.summary.FileWriter(log_dir, graph)
config = tf.ConfigProto()
config.gpu_options.allow_growth = False
best_val_acc = 0.8
with tf.Session(graph=graph) as sess:
    # Run the initializer
    epoch, step = 0, 0
    sess.run([init, iterator.initializer], feed_dict={sequences: train_data, labels: train_label})
    while epoch < num_epochs:
        try: 
            sess.run(train_op, feed_dict={is_training: True})
            step = step +1 
            if step % int(batches_per_epoch/4) == 0 or step == 1:
                loss, acc = sess.run([loss_op, accuracy], feed_dict={is_training: True})
                print_progress(step, loss, acc)
                [train_accuracy, s] = sess.run([accuracy, summ], feed_dict={is_training: True})
                tb_writer.add_summary(s, step)
        except tf.errors.OutOfRangeError:
            path
            epoch = epoch + 1
            val_acc = validation(epoch)           
            
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                save_path = saver.save(sess, "{}{}".format(path, "v2"))
                print("Model saved in path: %s" % save_path)
            sess.run(iterator.initializer, feed_dict={sequences: train_data, labels: train_label})
    print("Optimization Finished!")
    

Step 1, Loss=7.2150, Accuracy=0.008
Step 311, Loss=5.6659, Accuracy=0.047
Step 622, Loss=4.7612, Accuracy=0.078
Step 933, Loss=4.2461, Accuracy=0.180
Step VALIDATION for epoch 1, Loss=4.2032, Accuracy=0.173
Step 1244, Loss=4.3437, Accuracy=0.164
Step 1555, Loss=4.0910, Accuracy=0.188
Step 1866, Loss=4.0806, Accuracy=0.211
Step 2177, Loss=3.8330, Accuracy=0.188
Step VALIDATION for epoch 2, Loss=3.4746, Accuracy=0.277
Step 2488, Loss=3.5093, Accuracy=0.273
Step 2799, Loss=4.6315, Accuracy=0.180
Step 3110, Loss=3.5563, Accuracy=0.242
Step 3421, Loss=3.3471, Accuracy=0.289
Step VALIDATION for epoch 3, Loss=2.9640, Accuracy=0.367
Step 3732, Loss=3.2815, Accuracy=0.289
Step 4043, Loss=3.1671, Accuracy=0.312
Step 4354, Loss=2.9249, Accuracy=0.383
Step 4665, Loss=3.1327, Accuracy=0.273
Step VALIDATION for epoch 4, Loss=2.8107, Accuracy=0.400
Step 4976, Loss=2.8315, Accuracy=0.336
Step 5287, Loss=2.8589, Accuracy=0.375
Step 5598, Loss=3.2706, Accuracy=0.305
Step 5909, Loss=2.3473, Accuracy=0.49

# Validation with new sequences

In [37]:
import pandas as pd
data = pd.read_csv("..//..//data//test_sequences.csv", sep='\t', skipinitialspace=True)
data["Sequence"] = data.Sequence.str.ljust(500, '0')
letterToIndex = {'0': 0, 'A': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'K': 9, 'L': 10, 'M': 11, 'N': 12,
                 'P': 13, 'Q': 14, 'R': 15, 'S': 16, 'T': 17, 'V': 18, 'W': 19, 'Y': 20}
data["Sequence_vector"] = [[letterToIndex[char] for char in val ] for index, val in data.Sequence.iteritems()]
test_data= np.asarray([ np.asarray(element) for element in data["Sequence_vector"].values])
test_data_for_tensorflow = np.append(test_data, np.zeros((batch_size-len(test_data), sequence_length)), axis=0).astype(int)
test_data_for_tensorflow.shape

(128, 500)

In [38]:
test_elements = np.array([1,2,3,4,5,6])
label_for_tensorflow = np.append(test_elements, np.zeros((batch_size-len(test_elements))), axis=0).astype(int)
label_for_tensorflow.shape

(128,)

In [39]:
s = tf.Session(graph=graph)
s.run(init)
saver.restore(s, "../logs/tcn_sequence/v1")

INFO:tensorflow:Restoring parameters from ../logs/tcn_sequence/v1


In [40]:
np.set_printoptions(precision=8)
np.set_printoptions(suppress=True)

s.run(iterator.initializer, feed_dict={sequences: test_data_for_tensorflow, labels: label_for_tensorflow})
preds, ls = s.run([prediction, batch_labels], feed_dict={is_training: False})
count = 0
selected_ls = ls.take(np.argwhere(ls > 0))
selected_preds = preds.take(np.argwhere(ls > 0), axis=0)
for i in selected_ls.argsort(axis=0):
    if count == 0:
        print("\n\r")
        print("Oxidoreductases Transferases Hydrolases Lyases Isomerases Ligases")
    print(selected_preds[i])
    print(np.argmax(selected_preds[i]))
    count = count + 1

#     print( p["classes"]+1)



Oxidoreductases Transferases Hydrolases Lyases Isomerases Ligases
[[[0.         0.         0.05265274 ... 0.00000001 0.00000001 0.        ]]]
23
[[[0.        0.        0.0001981 ... 0.        0.        0.       ]]]
695
[[[0.         0.         0.21845001 ... 0.00000001 0.00000011 0.        ]]]
23
[[[0.         0.         0.00677673 ... 0.         0.         0.        ]]]
695
[[[0.         0.         0.06336619 ... 0.00000002 0.         0.        ]]]
23
[[[0.         0.         0.00001192 ... 0.         0.         0.        ]]]
101


In [31]:
selected_ls.argsort(axis=0)

array([[0],
       [4],
       [2],
       [1],
       [3]])

In [32]:
selected_ls

array([[1],
       [4],
       [3],
       [5],
       [2]], dtype=int32)

In [128]:
ls.sort()
ls

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5], dtype=int32)

In [130]:
np.argwhere(ls > 0)

array([[123],
       [124],
       [125],
       [126],
       [127]])