# TensorFlow Analysis

## Imports

In [1]:
##header ###
__author__ = "Jenhan Tao"
__license__ = "BSD"
__email__ = "jenhantao@gmail.com"

##imports ###
import sys
import os
import pandas as pd
import numpy as np
import matplotlib
import itertools
import scipy
import matplotlib.pyplot as plt 
import seaborn as sns
import pickle
import sklearn
import tensorflow as tf
from sklearn.cross_validation import StratifiedShuffleSplit
from IPython.display import clear_output, Image, display, HTML

##notebook specific configuration ###
%matplotlib inline
matplotlib.pylab.rcParams['savefig.dpi'] = 200
sys.setrecursionlimit(5000)
os.chdir('/gpfs/data01/glasslab/home/jtao/analysis/tensorflow_analysis/')
sns.set_context('notebook')
%load_ext autoreload
%autoreload 2



# Basic Tutorial

In [2]:
positive_features_frame = pd.read_pickle('/home/jtao/analysis/tensorflow_analysis/positive_features.pickle')

negative_features_frame = pd.read_pickle('/home/jtao/analysis/tensorflow_analysis/negative_features.pickle')

negative_features_frame = negative_features_frame.ix[:41508, :]

labels = np.array([[1,0] for x in range(41508)] + [[0,1] for x in range(41508)])
cv_labels = np.array([1 for x in range(41508)] + [0 for x in range(41508)])
features = pd.concat([positive_features_frame, negative_features_frame])

In [3]:
# TODO replace this code block with proper stratified cross validation to create training and test data sets

train_features = pd.concat([positive_features_frame.ix[:20754,:], negative_features_frame.ix[:20754,:]])
test_features = pd.concat([positive_features_frame.ix[20754:,:], negative_features_frame.ix[20754:,:]])

train_labels =  np.array([[1,0] for x in range(20754)] + [[0,1] for x in range(20754)])
test_labels =  np.array([[1,0] for x in range(20754)] + [[0,1] for x in range(20754)])

### Define Placeholder to hold features

In [111]:
tf.reset_default_graph()

In [112]:
x = tf.placeholder(tf.float32, [None, 196])

### Define Variables to hold weights and biases

In [113]:
W = tf.Variable(tf.zeros([196, 2]))
b = tf.Variable(tf.zeros([2]))

### Define model

In [114]:
y = tf.nn.softmax(tf.matmul(x, W) + b)

### Define Placeholder for True labels

In [115]:
# add a new placeholder to input the correct answers:
y_ = tf.placeholder(tf.float32, [None, 2])

### Define cross entropy

In [116]:
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), axis=1))

### Definine Optimization Method

In [117]:
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

### Initialize Variables

In [118]:
init = tf.global_variables_initializer()

### Launch Session

In [119]:
# launch Model in session
sess = tf.Session()
sess.run(init)

### Train Model in one batch

In [18]:
# alternatively you can just give the data to the model all at once
sess.run(train_step, feed_dict = {x:train_features, y_:train_labels})

In [19]:
# evaluate model performance
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))

accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

print(sess.run(accuracy, feed_dict={x: test_features , y_:test_labels}))


0.783546


### Train Model in batches

In [20]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sss = StratifiedShuffleSplit(cv_labels, n_iter=1, test_size=0.2, random_state=0 )
for train_index, test_index in sss:
    train_features = features.ix[train_index,:]
    test_features = features.ix[test_index,:]

    train_labels = labels[train_index]
    test_labels = labels[test_index]

    inner_sss = StratifiedShuffleSplit(train_labels, n_iter=10, test_size=0.2, random_state=0 )
    for batch_index, nonbatch_index in inner_sss:
        batch_features = train_features.ix[batch_index,:]

        batch_labels = train_labels[batch_index]

        sess.run(train_step, feed_dict={x: batch_features, y_: batch_labels})
        print('accuracy',
        sess.run(accuracy, 
                 feed_dict={x: batch_features , 
                            y_:batch_labels}
                )
         )
    
    print('overall accuracy',
        sess.run(accuracy, 
                 feed_dict={x: test_features , 
                            y_:test_labels}
                )
         )



accuracy 0.817435
accuracy 0.830018
accuracy 0.840426
accuracy 0.846647
accuracy 0.851268
accuracy 0.852868
accuracy 0.855267
accuracy 0.857517
accuracy 0.858975
accuracy 0.85986
overall accuracy 0.862142


# Neural Network

In [124]:
tf.reset_default_graph()

sess = tf.Session()

In [125]:
# calculate number of features and labels
num_features = features.shape[1]
num_classes = labels.shape[1]

# place holder for holding input data
x = tf.placeholder(tf.float32, shape=[None, num_features])

# add a new placeholder to input the correct answers:
y_ = tf.placeholder(tf.float32, shape = [None, num_classes])

In [126]:
import math

In [127]:
# define layer 1
output_length_layer1 = 200
w_layer1 = tf.Variable(tf.truncated_normal([num_features, output_length_layer1], 
                                           stddev=1.0/math.sqrt(num_features)))
b_layer1 = tf.Variable(tf.zeros([output_length_layer1]))
z_layer1 = tf.matmul(x, w_layer1) + b_layer1
a_layer1 = tf.nn.softmax(z_layer1)

# drop out layer
keep_prob = tf.placeholder(tf.float32)
dropout_layer = tf.nn.dropout(a_layer1, keep_prob)

In [128]:
# define layer 2 the output layer
output_length_layer2 = num_classes
w_layer2 = tf.Variable(tf.truncated_normal([output_length_layer1, output_length_layer2], 
                       stddev=1.0/math.sqrt(output_length_layer1)))
b_layer2 = tf.Variable(tf.zeros([output_length_layer2]))
z_layer2 = tf.matmul(dropout_layer, w_layer2) + b_layer2
y = tf.nn.softmax(z_layer2)

In [132]:
# cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), axis=1))
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

In [133]:
init = tf.global_variables_initializer()

sess.run(init)

In [138]:
# neural network training
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

sss = StratifiedShuffleSplit(cv_labels, n_iter=1, test_size=0.2, random_state=0 )

for train_index, test_index in sss:
    train_features = features.ix[train_index,:]
    test_features = features.ix[test_index,:]

    train_labels = labels[train_index]
    test_labels = labels[test_index]

    inner_sss = StratifiedShuffleSplit(train_labels, n_iter=200, test_size=0.95, random_state=0 )
    for batch_index, nonbatch_index in inner_sss:
        batch_features = train_features.ix[batch_index,:]

        batch_labels = train_labels[batch_index]

        sess.run(train_step, feed_dict={x: batch_features, 
                                        y_: batch_labels,
                                        keep_prob:0.5})
        print('current accuracy', sess.run(accuracy, 
                 feed_dict={x: batch_features , 
                            y_:batch_labels,
                           keep_prob:1.0}
                 )
          )
        
    print('accuracy',
    sess.run(accuracy, 
             feed_dict={x: test_features , 
                        y_:test_labels,
                        keep_prob:1.0}
             )
      )

current accuracy 0.88509
current accuracy 0.886295
current accuracy 0.888404
current accuracy 0.879066
current accuracy 0.89488
current accuracy 0.887349
current accuracy 0.882982
current accuracy 0.889006
current accuracy 0.888102
current accuracy 0.894729
current accuracy 0.892169
current accuracy 0.889759
current accuracy 0.887199
current accuracy 0.892169
current accuracy 0.893373
current accuracy 0.884639
current accuracy 0.890813
current accuracy 0.891867
current accuracy 0.891867
current accuracy 0.887199
current accuracy 0.883735
current accuracy 0.900602
current accuracy 0.882831
current accuracy 0.891416
current accuracy 0.889006
current accuracy 0.898193
current accuracy 0.88509
current accuracy 0.888404
current accuracy 0.893072
current accuracy 0.890361
current accuracy 0.889307
current accuracy 0.886295
current accuracy 0.891717
current accuracy 0.893825
current accuracy 0.892922
current accuracy 0.887952
current accuracy 0.890211
current accuracy 0.891265
current accurac

In [71]:
# without dropout
# define layer 1 for logistic regression
output_length_layer1 = num_classes
w_layer1 = tf.Variable(tf.zeros([num_features, output_length_layer1]))
b_layer1 = tf.Variable(tf.zeros([output_length_layer1]))
z_layer1 = tf.matmul(x, w_layer1) + b_layer1
y = tf.nn.softmax(z_layer1)

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), axis=1))

train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

init = tf.global_variables_initializer()

sess.run(init)

correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sss = StratifiedShuffleSplit(cv_labels, n_iter=1, test_size=0.2, random_state=0 )
for train_index, test_index in sss:
    train_features = features.ix[train_index,:]
    test_features = features.ix[test_index,:]

    train_labels = labels[train_index]
    test_labels = labels[test_index]

    inner_sss = StratifiedShuffleSplit(train_labels, n_iter=10, test_size=0.2, random_state=0 )
    for batch_index, nonbatch_index in inner_sss:
        batch_features = train_features.ix[batch_index,:]

        batch_labels = train_labels[batch_index]

        sess.run(train_step, feed_dict={x: batch_features, y_: batch_labels})
        print('accuracy',
        sess.run(accuracy, 
                 feed_dict={x: batch_features , 
                            y_:batch_labels}
                )
         )
    
    print('overall accuracy',
        sess.run(accuracy, 
                 feed_dict={x: test_features , 
                            y_:test_labels}
                )
         )




accuracy 0.781579
accuracy 0.816117
accuracy 0.830093
accuracy 0.841301
accuracy 0.847042
accuracy 0.850712
accuracy 0.852679
accuracy 0.855569
accuracy 0.858166
accuracy 0.858806
overall accuracy 0.860756


In [92]:
for f in os.listdir('./logs/'):
    os.remove('./logs/' + f)
file_writer = tf.summary.FileWriter('./logs/', sess.graph)

!tensorboard --logdir ./logs

Starting TensorBoard b'39' on port 6006
(You can navigate to http://169.228.63.221:6006)
^CTraceback (most recent call last):
  File "/gpfs/data01/glasslab/home/jtao/software/anaconda3/bin/tensorboard", line 11, in <module>
    sys.exit(main())
  File "/gpfs/data01/glasslab/home/jtao/software/anaconda3/lib/python3.5/site-packages/tensorflow/tensorboard/tensorboard.py", line 151, in main
    tb_server.serve_forever()
  File "/gpfs/data01/glasslab/home/jtao/software/anaconda3/lib/python3.5/socketserver.py", line 237, in serve_forever
    ready = selector.select(poll_interval)
  File "/gpfs/data01/glasslab/home/jtao/software/anaconda3/lib/python3.5/selectors.py", line 367, in select
    fd_event_list = self._poll.poll(timeout)
KeyboardInterrupt



# Zeyang's Code