In [1]:
from __future__ import print_function
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix

In [2]:
#read the data
#X_train = np.loadtxt("x_train_deep.txt", delimiter="\t")
#Y_train = np.loadtxt("y_train_deep.txt", delimiter="\t")

#OVERSAMPLING
X_train = np.loadtxt("x_train_deep_over.txt", delimiter="\t")
Y_train = np.loadtxt("y_train_deep_over.txt", delimiter="\t")

X_validation = np.loadtxt("x_validation_deep.txt", delimiter="\t")
Y_validation = np.loadtxt("y_validation_deep.txt", delimiter="\t")
#Y_validation = np.loadtxt("y_validation_deep_onehot.txt", delimiter="\t")

X_test = np.loadtxt("x_test.txt", delimiter="\t")
Y_test = np.loadtxt("y_test.txt", delimiter="\t")
#Y_test = np.loadtxt("y_test_onehot.txt", delimiter="\t")

print("X train dim: ", X_train.shape)
print("Y train dim: ", Y_train.shape)
print("X validation dim: ", X_validation.shape)
print("Y validation dim: ", Y_validation.shape)
print("X test dim: ", X_test.shape)
print("Y test dim: ", Y_test.shape)

X train dim:  (12778, 159)
Y train dim:  (12778,)
X validation dim:  (1694, 159)
Y validation dim:  (1694,)
X test dim:  (2117, 159)
Y test dim:  (2117,)


In [3]:
#to make things reproducible
random_state = 42
np.random.seed(random_state)
tf.set_random_seed(random_state)

# HyperParameters
training_epochs = 50
display_step = 2
batch_size = 64
learning_rate = 0.001
keep_prob = 0.8 #dropout 

# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
#n_hidden_3 = 256
num_input = 159 # MACCS descriptors without 7 features
num_classes = 2 #  total classes (Inducer/not-Inducer)

# tf Graph input
X = tf.placeholder("float", [None, num_input])
Y = tf.placeholder("float", [None, 1]) 
global_step = tf.Variable(name='step', dtype = tf.int32, initial_value = 0)

In [4]:
# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([num_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    #'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, 1]))
    #'out': tf.Variable(tf.random_normal([n_hidden_2, num_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    #'b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'out': tf.Variable(tf.random_normal([1]))
    #'out': tf.Variable(tf.random_normal([num_classes]))
}


keep_prob = tf.placeholder("float")
# Small epsilon value for the BN transform
epsilon = 1e-3

In [5]:
#with batch normalization
def neural_net(x, weights, biases, keep_prob, train, reuse):
    with tf.variable_scope('Neural_Net',reuse=reuse):
        # Hidden fully connected layer with 256 neurons
        layer_1 = tf.matmul(x, weights['h1'])
        layer1_bn = tf.contrib.layers.batch_norm(inputs=layer_1,is_training=train,activation_fn=tf.nn.relu)
        layer_1 = tf.nn.dropout(layer1_bn, keep_prob) #keep prob for dropout

        # Hidden fully connected layer with 256 neurons
        layer_2 = tf.matmul(layer_1, weights['h2'])
        layer2_bn = tf.contrib.layers.batch_norm(inputs=layer_2,is_training=train,activation_fn=tf.nn.relu)
        layer_2 = tf.nn.dropout(layer2_bn, keep_prob) #keep prob for dropout

        out_layer = tf.matmul(layer_2, weights['out']) + biases['out'] 
    return out_layer

In [6]:
#create model
predictions_train = neural_net(X, weights, biases, keep_prob, train=True, reuse=None)
predictions_val = neural_net(X, weights, biases, keep_prob, train=False, reuse=True)

#cost function is cross-entropy (sigmoid)
cost_train = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=predictions_train, labels=Y))
cost_val = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=predictions_val, labels=Y))

loss_train_sum = tf.summary.scalar('train_loss',cost_train)
loss_val_sum= tf.summary.scalar('val_loss',cost_val)

#ACCURACY
####################################################################
pred_prob_tr = tf.nn.sigmoid(predictions_train)
pred_bin_tr = tf.cast(pred_prob_tr > 0.5, dtype=tf.float32)
correct_prediction_tr = tf.equal(pred_bin_tr, Y)
correct_prediction_tr = tf.cast(correct_prediction_tr, tf.float32)
accuracy_train = tf.reduce_mean(correct_prediction_tr)

pred_prob_val = tf.nn.sigmoid(predictions_val)
pred_bin_val = tf.cast(pred_prob_val > 0.5, dtype=tf.float32)
correct_prediction_val = tf.equal(pred_bin_val, Y)
correct_prediction_val = tf.cast(correct_prediction_val, tf.float32)
accuracy_val = tf.reduce_mean(correct_prediction_val)

acc_tr_sum = tf.summary.scalar('train_acc',accuracy_train)
acc_val_sum= tf.summary.scalar('val_acc',accuracy_val)
####################################################################

#AUC
####################################################################
auc_tr, auc_tr_opt = tf.metrics.auc(Y, predictions=pred_prob_tr, name='auc_tr') #takes probs as an input
auc_val, auc_val_opt = tf.metrics.auc(Y, predictions=pred_prob_val, name='auc_val') #takes probs as an input

auc_tr_sum = tf.summary.scalar('train_auc',auc_tr_opt)
auc_val_sum= tf.summary.scalar('val_auc',auc_val_opt)
####################################################################

#PRECISION
####################################################################
precision_tr, precision_tr_opt = tf.metrics.precision(Y, predictions=pred_bin_tr, name='precision_tr')
precision_val, precision_val_opt = tf.metrics.precision(Y, predictions=pred_bin_val, name='precision_val')

precision_tr_sum = tf.summary.scalar('train_precision',precision_tr_opt)
precision_val_sum= tf.summary.scalar('val_precision',precision_val_opt)
####################################################################

#RECALL
####################################################################
recall_tr, recall_tr_opt = tf.metrics.recall(Y, predictions=pred_bin_tr, name='recall_tr')
recall_val, recall_val_opt = tf.metrics.recall(Y, predictions=pred_bin_val, name='recall_val')

recall_tr_sum = tf.summary.scalar('train_recall',recall_tr_opt)
recall_val_sum= tf.summary.scalar('val_recall',recall_val_opt)
####################################################################

summary_ops_tr = tf.summary.merge([acc_tr_sum, loss_train_sum, auc_tr_sum, 
                                   precision_tr_sum, recall_tr_sum])
summary_ops_val = tf.summary.merge([acc_val_sum, loss_val_sum, auc_val_sum, 
                                    precision_val_sum, recall_val_sum])

#get collections grabs variables under the scope 'UPDATE_OPS'
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

logdir='/home/german/ESC_RNA_seq/pathway_enrichment_analysis/machine_learning/deep_learning/tf_event/'
writer=tf.summary.FileWriter(logdir)

# control_dependencies ensures updates are done before backpropagation
with tf.control_dependencies(update_ops):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost_train,global_step)

In [7]:
#EVALUATION
Y_validation = np.reshape(Y_validation, (-1,1))
#Y_train = np.reshape(Y_train, (-1,1))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.initialize_local_variables())
    
    for epoch in range(training_epochs):
        avg_cost = 0.0
        total_batch = int(len(X_train) / batch_size)
        x_batches = np.array_split(X_train, total_batch)
        y_batches = np.array_split(Y_train, total_batch)
        for i in range(total_batch):
            batch_x, batch_y = x_batches[i], y_batches[i]
            batch_y = np.reshape(batch_y, (-1,1))
            
            _,train_sum, c,global_step_o = sess.run([optimizer,summary_ops_tr, cost_train,global_step], 
                            feed_dict={X : batch_x, 
                                       Y : batch_y, 
                                       keep_prob : 0.8})
            writer.add_summary(train_sum,global_step_o)
            avg_cost += c / total_batch
        
        sess.run(tf.initialize_local_variables())
        val_sum, c_val = sess.run([summary_ops_val, cost_val], 
                            feed_dict={X : X_validation, 
                                       Y : Y_validation, 
                                       keep_prob : 1})
        
        writer.add_summary(val_sum,epoch)
        #avg_cost += c / total_batch
            
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", \
                "{:.9f}".format(avg_cost))

    print("Optimization Finished!")
    

    #print("Validation accuracy:", accuracy.eval({X: X_validation, Y: Y_validation, keep_prob: 1.0}))
    
    #AUC
    #nit = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    #sess.run(init)
    #_,roc_score = tf.metrics.auc(labels = Y, predictions = pred_prob)
    #print(roc_score.eval({X : X_validation, Y : Y_validation, keep_prob: 1.0}))
    
    #FOR CONFUSION MATRIX
    x = pred_bin_val.eval({X : X_validation, Y : Y_validation, keep_prob: 1.0})

Instructions for updating:
Use `tf.local_variables_initializer` instead.
Epoch: 0001 cost= 3.721027816
Epoch: 0003 cost= 2.841066736
Epoch: 0005 cost= 2.384701459
Epoch: 0007 cost= 2.155918975
Epoch: 0009 cost= 1.896588268
Epoch: 0011 cost= 1.665088304
Epoch: 0013 cost= 1.550280616
Epoch: 0015 cost= 1.405730387
Epoch: 0017 cost= 1.248929445
Epoch: 0019 cost= 1.069665330
Epoch: 0021 cost= 0.960680990
Epoch: 0023 cost= 0.836676181
Epoch: 0025 cost= 0.728346195
Epoch: 0027 cost= 0.639900695
Epoch: 0029 cost= 0.551424410
Epoch: 0031 cost= 0.488612004
Epoch: 0033 cost= 0.425053754
Epoch: 0035 cost= 0.378174001
Epoch: 0037 cost= 0.327302839
Epoch: 0039 cost= 0.300845575
Epoch: 0041 cost= 0.256918131
Epoch: 0043 cost= 0.239602227
Epoch: 0045 cost= 0.197784731
Epoch: 0047 cost= 0.177083585
Epoch: 0049 cost= 0.180875312
Optimization Finished!


In [8]:
#EXAMPLE OF CONFUSION MATRIX IN TENSORFLOW
y =  [1, 1, 1, 1, 1, 0, 0, 0]
y_ = [1, 1, 1, 0, 0, 1, 0, 1]

con = tf.confusion_matrix(labels=y, predictions=y_ )
sess = tf.Session()
with sess.as_default():
        print(sess.run(con)) #ROWS ARE TRUE LABELS, COLUMNS ARE PREDICTIONS

[[1 2]
 [2 3]]


In [9]:
#CONFUSION MATRIX
xr = x.reshape(1, 1694)
yv = Y_validation.reshape(1, 1694)

#print(xr.tolist()[0])
#print(yv.tolist()[0])
con = tf.confusion_matrix(labels=yv.tolist()[0], predictions=xr.tolist()[0])
sess = tf.Session()
with sess.as_default():
        print(sess.run(con)) #ROWS ARE TRUE LABELS, COLUMNS ARE PREDICTIONS

[[1518   91]
 [  67   18]]


ACCURACY:
0.9067
SENSITIVITY:
0.364
SPECIFICITY:
0.9353
PRECISION:
0.2296
F1:
0.2818