In [1]:
import math
import numpy as np
#import h5py
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.framework import ops
#from tf_utils import load_dataset, random_mini_batches, convert_to_one_hot, predict

%matplotlib inline
np.random.seed(1)

In [2]:
X_train = np.loadtxt('X_train.csv', delimiter=',')
X_dev = np.loadtxt('X_dev.csv',delimiter=',')
X_test = np.loadtxt('X_test.csv',delimiter=',')
Y_train = np.loadtxt('Y_train.csv', delimiter=',')
Y_dev = np.loadtxt('Y_dev.csv', delimiter=',')
Y_test = np.loadtxt('Y_test.csv', delimiter=',')

In [3]:
Y_train = np.reshape(Y_train, (1, 5000))
Y_dev = np.reshape(Y_dev, (1, 1070))
Y_test = np.reshape(Y_test, (1, 1067))

In [4]:
print ("number of training examples = " + str(X_train.shape[1]))
print ("number of dev examples = " + str(X_dev.shape[1]))
print ("number of test examples = " + str(X_test.shape[1]))
print ("X_train shape: " + str(X_train.shape))
print ("Y_train shape: " + str(Y_train.shape))
print ("X_dev shape: " + str(X_dev.shape))
print ("Y_dev shape: " + str(Y_dev.shape))
print ("X_test shape: " + str(X_test.shape))
print ("Y_test shape: " + str(Y_test.shape))

number of training examples = 5000
number of dev examples = 1070
number of test examples = 1067
X_train shape: (939, 5000)
Y_train shape: (1, 5000)
X_dev shape: (939, 1070)
Y_dev shape: (1, 1070)
X_test shape: (939, 1067)
Y_test shape: (1, 1067)


In [5]:
# Let's just try with activate/no activate
Y_train = 1.0*(Y_train!=0)
Y_dev = 1.0*(Y_dev!=0)
Y_test = 1.0*(Y_test!=0)

In [6]:
def create_placeholders(n_x, n_y):
    """
    Creates the placeholders for the tensorflow session.
    
    Arguments:
    n_x -- scalar, size of a small molecule vector (939)
    n_y -- scalar = 1
    
    Returns:
    X -- placeholder for the data input, of shape [n_x, None] and dtype "float"
    Y -- placeholder for the input labels, of shape [n_y, None] and dtype "float"
 
    """

    X = tf.placeholder(tf.float32, shape = [n_x, None], name = "X")
    Y = tf.placeholder(tf.float32, shape = [n_y, None], name = "Y")
    
    return X, Y

In [7]:
def initialize_parameters(dims):
    """
    Initializes parameters to build a neural network with tensorflow. The shapes are:
                        Wk : [dims[k],dims[k-1]]
                        bk : [dims[k],1]
    
    Returns:
    parameters -- a dictionary of tensors containing W1, b1, ..., Wn, bn
    """
    tf.set_random_seed(1)  
    n = np.size(dims)

    assert dims[0]==939
    assert dims[n-1] == 1
    parameters = {}
    
    for kk in range(n-1):
        W = tf.get_variable("W"+str(kk+1), [dims[kk+1],dims[kk]], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
        b = tf.get_variable("b"+str(kk+1), [dims[kk+1],1], initializer = tf.zeros_initializer())
        parameters["W"+str(kk+1)] = W
        parameters["b"+str(kk+1)] = b
    
    return parameters

In [8]:
def forward_propagation(X, parameters):
    """
    Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX
    
    Arguments:
    X -- input dataset placeholder, of shape (input size, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"
                  the shapes are given in initialize_parameters

    Returns:
    Z_final -- the output of the last LINEAR unit
    """
    
    n = len(parameters)/2
    
    if n>1:
        for kk in range(n-1):
            W_kk = parameters['W'+str(kk+1)]
            b_kk = parameters['b'+str(kk+1)]
            if kk == 0:
                Z = tf.add(tf.matmul(W_kk,X),b_kk)
            else:
                Z = tf.add(tf.matmul(W_kk,A),b_kk)
            A = tf.nn.relu(Z) 
        W_final = parameters['W'+str(n)]
        b_final = parameters['b'+str(n)]
        Z_final = tf.sigmoid(tf.add(tf.matmul(W_final,A),b_final))                               
    
    else:
        W_final = parameters['W'+str(n)]
        b_final = parameters['b'+str(n)]
        Z_final = tf.sigmoid(tf.add(tf.matmul(W_final,X),b_final))      
        
    return Z_final

In [9]:
def compute_cost(Z, Y):
    """
    Computes the cost
    
    Arguments:
    Z -- output of forward propagation (output of the last LINEAR unit), of shape (1, number of examples)
    Y -- "true" labels vector placeholder, same shape as Z
    
    Returns:
    cost - Tensor of the cost function
    """
    eps = 10e-3
    logits = tf.transpose(Z) 
    labels = tf.transpose(Y)
    
    #cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels = labels, logits = logits))
    #cost = tf.add(tf.reduce_mean(tf.nn.l2_loss(Z3-Y)), tf.reduce_mean(tf.nn.l2_loss(tf.multiply(Y,Z3-Y))))
    cost = -tf.reduce_mean(tf.add(scale*tf.multiply(labels, tf.log(logits+eps)), tf.multiply(1-labels, tf.log(1-logits+eps))))
    #cost = tf.reduce_mean(tf.multiply(labels, tf.log(logits+eps)))
    
    return cost

In [10]:
def model(X_train, Y_train, X_test, Y_test, dims, learning_rate = 0.01,
          num_epochs = 1000, print_cost = True):
    """
    Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.
    
    Arguments:
    X_train -- training set, of shape (input size = 12288, number of training examples = 1080)
    Y_train -- test set, of shape (output size = 6, number of training examples = 1080)
    X_test -- training set, of shape (input size = 12288, number of training examples = 120)
    Y_test -- test set, of shape (output size = 6, number of test examples = 120)
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    print_cost -- True to print the cost every 100 epochs
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    
    ops.reset_default_graph()                         # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)                             # to keep consistent results
    seed = 3                                          # to keep consistent results
    (n_x, m) = X_train.shape                          # (n_x: input size, m : number of examples in the train set)
    n_y = Y_train.shape[0]                            # n_y : output size
    costs = []                                        # To keep track of the cost
    
    # Create Placeholders of shape (n_x, n_y)
    X, Y = create_placeholders(n_x, n_y)
    

    # Initialize parameters
    parameters = initialize_parameters(dims)
    
    
    # Forward propagation: Build the forward propagation in the tensorflow graph
    Z_final = forward_propagation(X, parameters)
    
    
    # Cost function: Add cost function to tensorflow graph
    cost = compute_cost(Z_final, Y)
    
    
    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
    
    
    # Initialize all the variables
    init = tf.global_variables_initializer()

    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:
        
        # Run the initialization
        sess.run(init)
        
        # Do the training loop
        for epoch in range(num_epochs):
            _ , epoch_cost = sess.run([optimizer, cost], feed_dict={X: X_train, Y: Y_train})

            # Print the cost every epoch
            if print_cost == True and epoch % 100 == 0:
                print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
            if print_cost == True and epoch % 5 == 0:
                costs.append(epoch_cost)
                
        # plot the cost
        #plt.plot(np.squeeze(costs))
        #plt.ylabel('cost')
        #plt.xlabel('iterations (per tens)')
        #plt.title("Learning rate =" + str(learning_rate))
        #plt.show()

        # lets save the parameters in a variable
        parameters = sess.run(parameters)
        print ("Parameters have been trained!")
        
        # Calculate the correct predictions
        correct_prediction = tf.equal(tf.argmax(Z_final), tf.argmax(Y))

        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
        print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
        
        return parameters

In [None]:
"""
HYPERPARAMETER SEARCH
The hyperparameters we will explore in this model are:
    -- Number of layers (1-5)
    -- dimension of each layer
    -- scale value (upweights the cost function on the positives)
"""
Hyperparam_data = {}

# #Layers=1 (i.e. linear regression)

dims = [939, 1]
#scale_values = np.random.randint(1,9, size = 10)*np.power(10.0, np.random.randint(-1,3, size = 10))
scale_values = [90]
for ii in range(np.size(scale_values)):
    X, Y = create_placeholders(939, 1)
    print ("X = " + str(X))
    print ("Y = " + str(Y))
    scale = scale_values[ii]
    parameters = model(X_train, Y_train, X_test, Y_test, dims, 1, 5000)
    with tf.Session() as sess:
        X, Y = create_placeholders(939,1)
        Z_train = sess.run(forward_propagation(X, parameters), feed_dict={X: X_train})
        Z_dev = sess.run(forward_propagation(X, parameters), feed_dict={X: X_dev})
    P_train = Z_train >= 0.5
    P_dev = Z_dev>=0.5
    true_positive = (Y_dev*P_dev).sum()
    false_negative = (Y_dev*(1-P_dev)).sum()
    false_positive = ((1-Y_dev)*P_dev).sum()
    false_positive
    F1 = (2*true_positive)/(2*true_positive + false_negative + false_positive)
    print("F1 Score:", F1)
    Hyperparam_data['Test'+str(ii)] = {'scale': scale, 'n_layers': 1, 'F1': F1}
    
 

dim_values = [360]
scale_values = [120]
for ii in range(np.size(scale_values)):
    dims = [939, dim_values[ii],1]
    X, Y = create_placeholders(939, 1)
    print ("X = " + str(X))
    print ("Y = " + str(Y))
    scale = scale_values[ii]
    parameters = model(X_train, Y_train, X_test, Y_test, dims, 0.01, 2000)
    with tf.Session() as sess:
        X, Y = create_placeholders(939,1)
        Z_train = sess.run(forward_propagation(X, parameters), feed_dict={X: X_train})
        Z_dev = sess.run(forward_propagation(X, parameters), feed_dict={X: X_dev})
        Z_test = sess.run(forward_propagation(X, parameters), feed_dict={X: X_test})
    P_train = Z_train >= 0.5
    P_dev = Z_dev>=0.5
    P_test = Z_test>0.5
    true_positive = (Y_dev*P_dev).sum()
    false_negative = (Y_dev*(1-P_dev)).sum()
    false_positive = ((1-Y_dev)*P_dev).sum()
    F1_dev = (2*true_positive)/(2*true_positive + false_negative + false_positive)
    true_positive = (Y_test*P_test).sum()
    false_negative = (Y_test*(1-P_test)).sum()
    false_positive = ((1-Y_test)*P_test).sum()
    F1_test = (2*true_positive)/(2*true_positive + false_negative + false_positive)
    print("F1 Score Dev:", F1_dev)
    print("F1 Score Test:", F1_test)
    Hyperparam_data['NewTest'+2*str(ii)] = {'scale': scale, 'n_layers': 2, 'dim_value':dim_values[ii], 'F1_dev': F1_dev, 'F1_test': F1_test}

    


dim_values1 = [75, 450, 700]
scale_values = [5, 5,5]
for ii in range(np.size(scale_values)):
    dim_value1 = dim_values1[ii]
    dim_value2 = np.random.randint(1, dim_value1)
    dims = [939, dim_value1, dim_value2, 1]
    X, Y = create_placeholders(939, 1)
    print ("X = " + str(X))
    print ("Y = " + str(Y))
    scale = scale_values[ii]
    parameters = model(X_train, Y_train, X_test, Y_test, dims, 0.0001, 2000)
    with tf.Session() as sess:
        X, Y = create_placeholders(939,1)
        Z_train = sess.run(forward_propagation(X, parameters), feed_dict={X: X_train})
        Z_dev = sess.run(forward_propagation(X, parameters), feed_dict={X: X_dev})
        Z_test = sess.run(forward_propagation(X, parameters), feed_dict={X: X_test})
    P_train = Z_train >= 0.5
    P_dev = Z_dev>=0.5
    P_test = Z_test>0.5
    true_positive = (Y_dev*P_dev).sum()
    false_negative = (Y_dev*(1-P_dev)).sum()
    false_positive = ((1-Y_dev)*P_dev).sum()
    F1_dev = (2*true_positive)/(2*true_positive + false_negative + false_positive)
    true_positive = (Y_test*P_test).sum()
    false_negative = (Y_test*(1-P_test)).sum()
    false_positive = ((1-Y_test)*P_test).sum()
    F1_test = (2*true_positive)/(2*true_positive + false_negative + false_positive)
    print("F1 Score Dev:", F1_dev)
    print("F1 Score Test:", F1_test)

    Hyperparam_data['Test'+5*str(ii)] = {'scale': scale, 'n_layers': 3, 'dim_value1':dim_value1, 'dim_value2':dim_value2, 'F1_dev': F1_dev, 'F1_test':F1_test}


X = Tensor("X_2:0", shape=(939, ?), dtype=float32)
Y = Tensor("Y_2:0", shape=(1, ?), dtype=float32)
Cost after epoch 0: 7.251016
Cost after epoch 100: 1.471244
Cost after epoch 200: 1.194067
Cost after epoch 300: 1.038743
Cost after epoch 400: 0.929934
Cost after epoch 500: 0.835980
Cost after epoch 600: 0.756696
Cost after epoch 700: 0.693663
Cost after epoch 800: 0.630854
Cost after epoch 900: 0.575749
Cost after epoch 1000: 0.527550
Cost after epoch 1100: 0.489406
Cost after epoch 1200: 0.459536
Cost after epoch 1300: 0.429621
Cost after epoch 1400: 0.405958
Cost after epoch 1500: 0.385143
Cost after epoch 1600: 0.367081
Cost after epoch 1700: 0.351182


In [12]:
np.save('hyperparamdatscaling.npy', Hyperparam_data)