## 1. load library

In [None]:
# make sure you have these libraries

# Data
import pandas as pd

# Math
import numpy as np

# ML
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras import layers
from sklearn.utils import shuffle
from sklearn.model_selection import KFold

def seq2vec(string, outlen, vocab):
    """
    given dictionary, convet string to a vector.
    the length of string should be shorter then outlen.
    if the length string is smaller than outlen, it will be padded with zero to make the langth the same.
    
    string: input string (length smaller than outlen)
    outlen: output length of vecotr.
    vocab: the dictionary for the encoding.
    """   
    vector = [vocab[amino_acid] for amino_acid in string]
    vector = np.pad(vector, (0,outlen-len(vector)), constant_values=20)
    return np.array(vector)

## 2. load the data and tranform

In [None]:
# Load data
data = pd.read_csv("seq_data.csv")
vfile = pd.read_csv("vocab.csv", index_col=None, skiprows=0)
vocab = {vfile["One-Letter Code"][i]:i for i in range(len(vfile["One-Letter Code"]))}

# Change vector to one-hot encoding
vector = np.array([to_categorical(seq2vec(data["seq"][i], 84, vocab)) for i in data.index])

# Label the class according to scores
data.loc[(data["binding score"] + data["digest score"] > 0), "label"] = 0
data.loc[(data["binding score"] + data["digest score"] <= 0), "label"] = 1

# Define constant and set up optimizer
Tx = 84 # <- here i make all resulting vector with the same length, which is not necessarily.
nbatch = 320
nepoch = 1#00 # <- to just see whether it works or not, change it to 1
adam = Adam(learning_rate=0.001)

# Create training/testing set (80-20 split)
sub_data = data.iloc[:]
index = np.arange(len(sub_data))
np.random.seed(1)
np.random.shuffle(index)

train_index = index[:len(sub_data)*8//10]
test_index = index[len(sub_data)*8//10:]

trainX = vector[train_index]
trainY = data["label"][train_index].values
testX = vector[test_index]
testY = data["label"][test_index].values

## 3. build the computation graph (LSTM with attention mechanism)
##### This is the best model I have tested
##### Let me know if you want something simpler (such as vanila RNN)

In [None]:
# Build the computation graph below

# Define inputs
inputs = tf.keras.Input(shape=[Tx, len(vocab)], dtype='float32', name='Input')

# Lstm layer
activations = layers.Bidirectional(layers.LSTM(units=64,
                                               kernel_regularizer=l2(1e-3),
                                               recurrent_regularizer=l2(1e-4),
                                               input_shape=(84, len(vocab)),
                                               return_sequences=True,
                                               name='LSTM_1'),
                                   name='Bidirectional_1'
                                  )(inputs)
activations = layers.BatchNormalization(name='BN_1')(activations)
activations = layers.Bidirectional(layers.LSTM(units=64,
                                               kernel_regularizer=l2(1e-3),
                                               recurrent_regularizer=l2(1e-4),
                                               input_shape=(84, len(vocab)),
                                               return_sequences=True,
                                               name='LSTM_2'),
                                   name='Bidirectional_2'
                                  )(activations)
activations = layers.BatchNormalization(name='BN_2')(activations)

# Attention layer
units = 128 # bidirectional
attention = tf.keras.layers.Dense(1, activation='tanh', name='Attention_dense')(activations)
attention = tf.keras.layers.Flatten(name='Attention_flatten')(attention)
attention = tf.keras.layers.Activation('softmax', name='Attention_layer')(attention)
attention = tf.keras.layers.RepeatVector(units, name='Attention_repeat')(attention)
attention = tf.keras.layers.Permute([2, 1], name='Attention_permute')(attention)

# Combine attention weights and lstm output
attension_weights = tf.keras.layers.Multiply(name='Attention_apply')([activations, attention])
representation = tf.keras.layers.Lambda(lambda xin: tf.keras.backend.sum(xin, axis=-2), name='Attention_sum')(attension_weights)

# FC
representation = layers.Dropout(0.1, input_shape=(64,), name='Dropout_1')(representation)
representation = layers.Dense(units=64, activation='relu', name='FC_1')(representation)
representation = layers.BatchNormalization(name='BN_3')(representation)
representation = layers.Dropout(0.1, input_shape=(64,), name='Dropout_2')(representation)

# Define output
probabilities = tf.keras.layers.Dense(2, activation='softmax', name='FC_2')(representation)

## 4. Compile and train the model
##### Save it to "saved_model"

In [None]:
# Create model
model = tf.keras.models.Model(inputs, probabilities)
model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

# See the summary
model.summary()

# train it
model.fit(trainX, trainY, validation_data=(testX,testY), epochs=nepoch, batch_size=nbatch, verbose=1)
model.save('saved_model/')

## 5. Load the model just trained

In [None]:
model = tf.keras.models.load_model('saved_model')

## 6. Application
##### Such as make a prediction -- (1)
##### Or see the attention weights -- (2)

In [None]:
#(1)
predY = model.predict(testX)
print("testing accuracy = {:5f}".format(np.mean(np.argmax(predY, axis=1) == testY)))

In [None]:
#(2)

# load the attention weights
keras_function = tf.keras.backend.function([model.input], [model.get_layer('Attention_layer').output])

# see particular case such as sequecne no. 10141
i = np.where(train_index == 10141)[0][0]
weights = keras_function(trainX[i:i+1])[0]