# Lab: Machine Learning

## Setup

In [1]:
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility

## Load Data

In [2]:
def read_training_data(fname, D=None):
    file = open(fname)
    params = ["radius", "texture", "perimeter","area","smoothness","compactness","concavity","concave points","symmetry","fractal dimension"];
    stats = ["(mean)", "(stderr)", "(worst)"]
    feature_labels = set([y+x for x in stats for y in params])
    feature_map = {params[i]+stats[j]:j*len(params)+i for i in range(len(params)) for j in range(len(stats))}
    
    patient_ids = []
    feature_vectors = []
    patient_diagnoses = []
    for line in file:
        row = line.split(",")
        patient_ID = int(row[0])
        patient_ids.append(patient_ID)
        
        patient_diagnoses.append([0 if row[1]=='B' else +1])
        feature_vectors.append([float(row[feature_map[key]+2]) for key in feature_labels])
    return patient_ids, feature_labels, np.array(feature_vectors, dtype=np.float32), np.array(patient_diagnoses, dtype=np.float32)

In [3]:
ids_train, labels_train, feature_vectors_train, patient_diagnoses_train = read_training_data('train.data')

In [4]:
print("label: ", labels_train)
print("feature_vectors: ", feature_vectors_train)
print("patient_diagnoses: ", patient_diagnoses_train)

label:  {'radius(worst)', 'smoothness(mean)', 'symmetry(stderr)', 'area(stderr)', 'concavity(mean)', 'texture(stderr)', 'area(mean)', 'compactness(mean)', 'fractal dimension(stderr)', 'symmetry(worst)', 'compactness(worst)', 'perimeter(mean)', 'fractal dimension(worst)', 'smoothness(worst)', 'smoothness(stderr)', 'perimeter(worst)', 'texture(worst)', 'concavity(worst)', 'concavity(stderr)', 'radius(stderr)', 'concave points(worst)', 'texture(mean)', 'concave points(stderr)', 'symmetry(mean)', 'area(worst)', 'radius(mean)', 'fractal dimension(mean)', 'perimeter(stderr)', 'concave points(mean)', 'compactness(stderr)'}
feature_vectors:  [[  2.53799992e+01   1.18400000e-01   3.00299991e-02 ...,   8.58899975e+00
    1.47100002e-01   4.90400009e-02]
 [  2.49899998e+01   8.47399980e-02   1.38900001e-02 ...,   3.39800000e+00
    7.01700002e-02   1.30799999e-02]
 [  2.35699997e+01   1.09600000e-01   2.25000009e-02 ...,   4.58500004e+00
    1.27900004e-01   4.00599986e-02]
 ..., 
 [  1.33599997e

In [5]:
x_data_train = feature_vectors_train
y_data_train = patient_diagnoses_train
print(x_data_train.shape, y_data_train.shape)
print(x_data_train, y_data_train)

(300, 30) (300, 1)
[[  2.53799992e+01   1.18400000e-01   3.00299991e-02 ...,   8.58899975e+00
    1.47100002e-01   4.90400009e-02]
 [  2.49899998e+01   8.47399980e-02   1.38900001e-02 ...,   3.39800000e+00
    7.01700002e-02   1.30799999e-02]
 [  2.35699997e+01   1.09600000e-01   2.25000009e-02 ...,   4.58500004e+00
    1.27900004e-01   4.00599986e-02]
 ..., 
 [  1.33599997e+01   9.96799991e-02   1.57500003e-02 ...,   4.13800001e+00
    3.51499990e-02   1.00499997e-02]
 [  1.62199993e+01   6.57600015e-02   1.10299997e-02 ...,   1.66100001e+00
    1.37400003e-02   1.37700001e-02]
 [  1.09300003e+01   1.01499997e-01   3.46399993e-02 ...,   2.28900003e+00
    1.87500007e-02   1.44300004e-02]] [[ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]


In [6]:
ids_test, labels_test, feature_vectors_test, patient_diagnoses_test = read_training_data('validate.data')

In [7]:
x_data_test = feature_vectors_test
y_data_test = patient_diagnoses_test
print(x_data_test.shape, y_data_test.shape)
print(x_data_test, y_data_test)

(260, 30) (260, 1)
[[  2.59300003e+01   1.15000002e-01   1.88400000e-02 ...,   7.23699999e+00
    1.06200002e-01   3.20300013e-02]
 [  1.34600000e+01   8.45099986e-02   2.74000000e-02 ...,   2.57900000e+00
    3.09900008e-02   3.36900018e-02]
 [  2.36800003e+01   1.08000003e-01   2.73599997e-02 ...,   7.80399990e+00
    1.28000006e-01   4.73200008e-02]
 ..., 
 [  1.04899998e+01   8.12299997e-02   3.00399996e-02 ...,   3.61800003e+00
    0.00000000e+00   1.12399999e-02]
 [  1.54799995e+01   8.47299993e-02   1.63800009e-02 ...,   2.22399998e+00
    3.73600014e-02   4.63900007e-02]
 [  1.24799995e+01   9.26100016e-02   1.48799997e-02 ...,   1.93599999e+00
    4.10499983e-02   2.98200008e-02]] [[ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]


## Non normalized

In [8]:
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 30])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([30, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis using sigmoid: tf.div(1., 1. + tf.exp(tf.matmul(X, W)))
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)

# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))

train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    for step in range(10001):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data_train, Y: y_data_train})
        if step % 1000 == 0:
            print(step, cost_val)

    # Accuracy report
    a = sess.run(accuracy, feed_dict={X: x_data_train, Y: y_data_train})
    print("Train Accuracy: ", a)
    a = sess.run(accuracy, feed_dict={X: x_data_test, Y: y_data_test})
    print("Test Accuracy: ", a)

0 nan
1000 nan
2000 nan
3000 nan
4000 nan
5000 nan
6000 nan
7000 nan
8000 nan
9000 nan
10000 nan
Train Accuracy:  0.513333
Test Accuracy:  0.769231


## Normalized

In [9]:
def MinMaxScaler(data):
    numerator = data - np.min(data, 0)
    denominator = np.max(data, 0) - np.min(data, 0)
    # noise term prevents the zero division
    return numerator / (denominator + 1e-7)

In [10]:
x_data_train = MinMaxScaler(feature_vectors_train)
y_data_train = patient_diagnoses_train
print(x_data_train.shape, y_data_train.shape)
print(x_data_train, y_data_train)

(300, 30) (300, 1)
[[ 0.6927352   0.68000889  0.31164473 ...,  0.3690336   0.73111296
   0.35139817]
 [ 0.67725289  0.2704705   0.08453863 ...,  0.12444047  0.3487573
   0.08132298]
 [ 0.62088132  0.57294005  0.20569004 ...,  0.18037036  0.63568562
   0.28395444]
 ..., 
 [ 0.21556173  0.45224422  0.11071072 ...,  0.1593083   0.17470171
   0.05856636]
 [ 0.32909882  0.03954251  0.04429554 ...,  0.0425953   0.06829023
   0.08650517]
 [ 0.1190949   0.474388    0.37651211 ...,  0.07218584  0.09319081
   0.09146206]] [[ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]

In [11]:
x_data_test = MinMaxScaler(feature_vectors_test)
y_data_test = patient_diagnoses_test
print(x_data_test.shape, y_data_test.shape)
print(x_data_test, y_data_test)

(260, 30) (260, 1)
[[ 0.63050944  0.50241554  0.16708978 ...,  0.36163908  0.55514866
   0.39524317]
 [ 0.17476791  0.18895839  0.33437499 ...,  0.10110412  0.16199678
   0.41785342]
 [ 0.54827863  0.43045098  0.33359325 ...,  0.39335296  0.66910577
   0.60350245]
 ..., 
 [ 0.0662232   0.15523787  0.38596758 ...,  0.1592183   0.          0.11207046]
 [ 0.24859291  0.19122015  0.11901484 ...,  0.08124798  0.19529524
   0.59083527]
 [ 0.13895179  0.2722317   0.08970083 ...,  0.06513933  0.21458429
   0.3651416 ]] [[ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]


In [12]:
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 30])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([30, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis using sigmoid: tf.div(1., 1. + tf.exp(tf.matmul(X, W)))
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)

# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))

train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    for step in range(10001):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data_train, Y: y_data_train})
        if step % 1000 == 0:
            print(step, cost_val)

    # Accuracy report
    a = sess.run(accuracy, feed_dict={X: x_data_train, Y: y_data_train})
    print("Train Accuracy: ", a)
    a = sess.run(accuracy, feed_dict={X: x_data_test, Y: y_data_test})
    print("Test Accuracy: ", a)

0 0.733251
1000 0.450161
2000 0.38207
3000 0.338966
4000 0.308978
5000 0.286717
6000 0.269407
7000 0.255471
8000 0.243947
9000 0.234213
10000 0.225848
Train Accuracy:  0.943333
Test Accuracy:  0.95


## Multi Layers(NN)

In [13]:
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 30])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W1 = tf.Variable(tf.random_normal([30, 256]), name='weight1')
b1 = tf.Variable(tf.random_normal([256]), name='bias1')
layer1 = tf.sigmoid(tf.matmul(X, W1) + b1)

W2 = tf.Variable(tf.random_normal([256, 256]), name='weight2')
b2 = tf.Variable(tf.random_normal([256]), name='bias2')
layer2 = tf.sigmoid(tf.matmul(layer1, W2) + b2)

W3 = tf.Variable(tf.random_normal([256, 256]), name='weight3')
b3 = tf.Variable(tf.random_normal([256]), name='bias3')
layer3 = tf.sigmoid(tf.matmul(layer2, W3) + b3)

W4 = tf.Variable(tf.random_normal([256, 1]), name='weight4')
b4 = tf.Variable(tf.random_normal([1]), name='bias4')
hypothesis = tf.sigmoid(tf.matmul(layer3, W4) + b4)


# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))

train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    for step in range(10001):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data_train, Y: y_data_train})
        if step % 1000 == 0:
            print(step, cost_val)

    # Accuracy report
    a = sess.run(accuracy, feed_dict={X: x_data_train, Y: y_data_train})
    print("Train Accuracy: ", a)
    a = sess.run(accuracy, feed_dict={X: x_data_test, Y: y_data_test})
    print("Test Accuracy: ", a)

0 4.77009
1000 0.0755213
2000 0.0573293
3000 0.0484966
4000 0.0425908
5000 0.0382573
6000 0.0347765
7000 0.0318223
8000 0.0292361
9000 0.0269647
10000 0.0250393
Train Accuracy:  0.993333
Test Accuracy:  0.976923


## ReLu & Xavier

In [14]:
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 30])
Y = tf.placeholder(tf.float32, shape=[None, 1])

# W1 = tf.Variable(tf.random_normal([30, 256]), name='weight1')
W1 = tf.get_variable("rx_weight1", shape=[30, 256],
                     initializer=tf.contrib.layers.xavier_initializer(), )
b1 = tf.Variable(tf.random_normal([256]), name='bias1')
layer1 = tf.nn.relu(tf.matmul(X, W1) + b1)

# W2 = tf.Variable(tf.random_normal([256, 256]), name='weight2')
W2 = tf.get_variable("rx_weight2", shape=[256, 256],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([256]), name='bias2')
layer2 = tf.nn.relu(tf.matmul(layer1, W2) + b2)

# W3 = tf.Variable(tf.random_normal([256, 256]), name='weight3')
W3 = tf.get_variable("rx_weight3", shape=[256, 256],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([256]), name='bias3')
layer3 = tf.nn.relu(tf.matmul(layer2, W3) + b3)

# W4 = tf.Variable(tf.random_normal([256, 1]), name='weight4')
W4 = tf.get_variable("rx_weight4", shape=[256, 1],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([1]), name='bias4')
hypothesis = tf.sigmoid(tf.matmul(layer3, W4) + b4)


# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))

train = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    for step in range(10001):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data_train, Y: y_data_train})
        if step % 1000 == 0:
            print(step, cost_val)

    # Accuracy report
    a = sess.run(accuracy, feed_dict={X: x_data_train, Y: y_data_train})
    print("Train Accuracy: ", a)
    a = sess.run(accuracy, feed_dict={X: x_data_test, Y: y_data_test})
    print("Test Accuracy: ", a)

0 1.58347
1000 0.609743
2000 0.502468
3000 0.367882
4000 0.266396
5000 0.209192
6000 0.176648
7000 0.155429
8000 0.140629
9000 0.129908
10000 0.121867
Train Accuracy:  0.96
Test Accuracy:  0.969231


* Xavier가 없거나 learning rate가 조정되지 않으면, ReLu를 적용해도 동작하지 않았음

## Dropout

In [15]:
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 30])
Y = tf.placeholder(tf.float32, shape=[None, 1])

# dropout (keep_prob) rate  0.7 on training, but should be 1 for testing
keep_prob = tf.placeholder(tf.float32)

W1 = tf.get_variable("d_weight1", shape=[30, 256],
                     initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([256]), name='bias1')
layer1 = tf.nn.relu(tf.matmul(X, W1) + b1)
layer1 = tf.nn.dropout(layer1, keep_prob=keep_prob)

W2 = tf.get_variable("d_weight2", shape=[256, 256],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([256]), name='bias2')
layer2 = tf.nn.relu(tf.matmul(layer1, W2) + b2)
layer2 = tf.nn.dropout(layer2, keep_prob=keep_prob)

W3 = tf.get_variable("d_weight3", shape=[256, 256],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([256]), name='bias3')
layer3 = tf.nn.relu(tf.matmul(layer2, W3) + b3)
layer3 = tf.nn.dropout(layer3, keep_prob=keep_prob)

W4 = tf.get_variable("d_weight4", shape=[256, 1],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([1]), name='bias4')
hypothesis = tf.sigmoid(tf.matmul(layer3, W4) + b4)


# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))

train = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    for step in range(10001):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data_train, Y: y_data_train, keep_prob: 0.7})
        if step % 1000 == 0:
            print(step, cost_val)

    # Accuracy report
    a = sess.run(accuracy, feed_dict={X: x_data_train, Y: y_data_train, keep_prob: 1})
    print("Train Accuracy: ", a)
    a = sess.run(accuracy, feed_dict={X: x_data_test, Y: y_data_test, keep_prob: 1})
    print("Test Accuracy: ", a)

0 0.846847
1000 0.746119
2000 0.697932
3000 0.691313
4000 0.640065
5000 0.587214
6000 0.578331
7000 0.507666
8000 0.47365
9000 0.429216
10000 0.384667
Train Accuracy:  0.92
Test Accuracy:  0.961538


## Dropout without ReLu & Xavier

In [16]:
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 30])
Y = tf.placeholder(tf.float32, shape=[None, 1])

# dropout (keep_prob) rate  0.7 on training, but should be 1 for testing
keep_prob = tf.placeholder(tf.float32)

W1 = tf.Variable(tf.random_normal([30, 256]), name='weight1')
b1 = tf.Variable(tf.random_normal([256]), name='bias1')
layer1 = tf.sigmoid(tf.matmul(X, W1) + b1)
layer1 = tf.nn.dropout(layer1, keep_prob=keep_prob)

W2 = tf.Variable(tf.random_normal([256, 256]), name='weight2')
b2 = tf.Variable(tf.random_normal([256]), name='bias2')
layer2 = tf.sigmoid(tf.matmul(layer1, W2) + b2)
layer2 = tf.nn.dropout(layer2, keep_prob=keep_prob)

W3 = tf.Variable(tf.random_normal([256, 256]), name='weight3')
b3 = tf.Variable(tf.random_normal([256]), name='bias3')
layer3 = tf.sigmoid(tf.matmul(layer2, W3) + b3)
layer3 = tf.nn.dropout(layer3, keep_prob=keep_prob)

W4 = tf.Variable(tf.random_normal([256, 1]), name='weight4')
b4 = tf.Variable(tf.random_normal([1]), name='bias4')
hypothesis = tf.sigmoid(tf.matmul(layer3, W4) + b4)


# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))

train = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    for step in range(10001):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data_train, Y: y_data_train, keep_prob: 0.7})
        if step % 1000 == 0:
            print(step, cost_val)

    # Accuracy report
    a = sess.run(accuracy, feed_dict={X: x_data_train, Y: y_data_train, keep_prob: 1})
    print("Train Accuracy: ", a)
    a = sess.run(accuracy, feed_dict={X: x_data_test, Y: y_data_test, keep_prob: 1})
    print("Test Accuracy: ", a)

0 7.96391
1000 nan
2000 nan
3000 nan
4000 nan
5000 nan
6000 nan
7000 nan
8000 nan
9000 nan
10000 nan
Train Accuracy:  0.513333
Test Accuracy:  0.769231


## Adam

In [17]:
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 30])
Y = tf.placeholder(tf.float32, shape=[None, 1])

# dropout (keep_prob) rate  0.7 on training, but should be 1 for testing
keep_prob = tf.placeholder(tf.float32)

W1 = tf.get_variable("a_weight1", shape=[30, 256],
                     initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([256]), name='bias1')
layer1 = tf.nn.relu(tf.matmul(X, W1) + b1)
layer1 = tf.nn.dropout(layer1, keep_prob=keep_prob)

W2 = tf.get_variable("a_weight2", shape=[256, 256],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([256]), name='bias2')
layer2 = tf.nn.relu(tf.matmul(layer1, W2) + b2)
layer2 = tf.nn.dropout(layer2, keep_prob=keep_prob)

W3 = tf.get_variable("a_weight3", shape=[256, 256],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([256]), name='bias3')
layer3 = tf.nn.relu(tf.matmul(layer2, W3) + b3)
layer3 = tf.nn.dropout(layer3, keep_prob=keep_prob)

W4 = tf.get_variable("a_weight4", shape=[256, 1],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([1]), name='bias4')
hypothesis = tf.sigmoid(tf.matmul(layer3, W4) + b4)


# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))

train = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    for step in range(10001):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data_train, Y: y_data_train, keep_prob: 0.7})
        if step % 1000 == 0:
            print(step, cost_val)

    # Accuracy report
    a = sess.run(accuracy, feed_dict={X: x_data_train, Y: y_data_train, keep_prob: 1})
    print("Train Accuracy: ", a)
    a = sess.run(accuracy, feed_dict={X: x_data_test, Y: y_data_test, keep_prob: 1})
    print("Test Accuracy: ", a)

0 0.94181
1000 nan
2000 nan
3000 nan
4000 nan
5000 nan
6000 nan
7000 nan
8000 nan
9000 nan
10000 nan
Train Accuracy:  0.513333
Test Accuracy:  0.769231
