# DLNLP W4 Notebook 3
## Fully connected model for SS prediction

In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

First we load the data:

In [2]:
data = np.load('/Users/frellsen/Dropbox/Share/Permanent/DLNLP2018/cullpdb+profile_6133_ss3-preprocessed.npz')
X_train = data['X_train']
y_train = data['y_train']
X_validation = data['X_validation']
y_validation = data['y_validation']
X_test = data['X_test']
y_test = data['y_test']

Then we define the model/graph.

In [3]:
# Input and output
X = tf.placeholder(tf.float32, [None, 700, 44], name="X")
y = tf.placeholder(tf.float32, [None, 700, 4], name='y')

# Defined the model parameters
hidden = 100
W1 = tf.get_variable("W1", [700 * 44, hidden], initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.get_variable("b1", [hidden], initializer=tf.random_normal_initializer())
W2 = tf.get_variable("W2", [hidden, 700 * 4], initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.get_variable("b2", [700 * 4], initializer=tf.random_normal_initializer())

# Construct model
z1 = tf.nn.relu(tf.matmul(tf.layers.flatten(X), W1) + b1)
a2 = tf.reshape(tf.matmul(z1, W2) + b2, [-1, 700, 4])
y_ = tf.nn.softmax(a2)

# Then we mask out the NoSeq
mask = tf.not_equal(tf.argmax(y, 2), 3)

y_masked = tf.boolean_mask(y, mask)
a2_masked = tf.boolean_mask(a2, mask)
y__masked = tf.boolean_mask(y_, mask)

# Difine the loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_masked, logits=a2_masked))

# Define the optimizer operation
learning_rate = tf.placeholder(tf.float32)

#optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(loss)
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss)

# Variables for prediction and accuracy
prediction = tf.argmax(y__masked, 1)
accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, tf.argmax(y_masked, 1)), tf.float32))

# Initialize the variables (they are assigned default values)
init = tf.global_variables_initializer()

n_parameters = np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()])
print("Number of parameters:", n_parameters)

Number of parameters: 3362900


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Then we train the model:

In [5]:
batch_size = 100

# Start as session
with tf.Session() as session:

    # Run the initializer
    session.run(init)

    # Training cycle
    for epoch in range(10):
        print("Epoch:", epoch)
        for b in range(0, X_train.shape[0], batch_size):
            _, loss_value = session.run([optimizer, loss], feed_dict={X: X_train[b:b+batch_size],
                                                                      y: y_train[b:b+batch_size],
                                                                      learning_rate: 0.0001})
            
            if b % 1000 == 0:
                validation_accuracy = session.run(accuracy, feed_dict={X: X_validation, y: y_validation})
                print("loss[b=%04d] = %f, val_acc = %f" % (b, loss_value, validation_accuracy))        
        
    print("Optimization done")

    # Calculate training accuracy
    train_accuracy_value, pred_train = session.run([accuracy, prediction], feed_dict={X: X_train, y: y_train})
    print("Train accuracy:", train_accuracy_value)
    
    # Calculate test accuracy
    test_accuracy_value, pred_test = session.run([accuracy, prediction], feed_dict={X: X_test, y: y_test})
    print("Test accuracy:", test_accuracy_value)

Epoch: 0
loss[b=0000] = 1.802155, val_acc = 0.242607
loss[b=1000] = 1.712274, val_acc = 0.257421
loss[b=2000] = 1.583822, val_acc = 0.293727
loss[b=3000] = 1.439679, val_acc = 0.331930
loss[b=4000] = 1.289641, val_acc = 0.366794
loss[b=5000] = 1.178357, val_acc = 0.387773
Epoch: 1
loss[b=0000] = 1.142702, val_acc = 0.390884
loss[b=1000] = 1.122578, val_acc = 0.392686
loss[b=2000] = 1.096569, val_acc = 0.395816
loss[b=3000] = 1.094561, val_acc = 0.397390
loss[b=4000] = 1.068161, val_acc = 0.400027
loss[b=5000] = 1.061720, val_acc = 0.402170
Epoch: 2
loss[b=0000] = 1.061193, val_acc = 0.401677
loss[b=1000] = 1.075919, val_acc = 0.400804
loss[b=2000] = 1.068749, val_acc = 0.404427
loss[b=3000] = 1.074577, val_acc = 0.404560
loss[b=4000] = 1.055802, val_acc = 0.406191
loss[b=5000] = 1.051662, val_acc = 0.407747
Epoch: 3
loss[b=0000] = 1.052595, val_acc = 0.407728
loss[b=1000] = 1.068608, val_acc = 0.405660
loss[b=2000] = 1.062011, val_acc = 0.407880
loss[b=3000] = 1.067754, val_acc = 0.408