In [1]:
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import tensorflow as tf
import numpy as np
import codecs
import csv

In [2]:
with codecs.open("BreastCancer.csv", "r", "utf-8") as f:
    dataSet = np.array([row for row in csv.reader(f)][1:])

In [3]:
targets = {
    "B":0,
    "M":1
}

In [4]:
X = dataSet[:,2:].astype(np.float32)
Y = dataSet[:,1]
Y[Y == "B"] = targets["B"]
Y[Y == "M"] = targets["M"]

In [5]:
trainX, testX, trainY, testY = train_test_split(X, Y, test_size=0.25, random_state=1234)

In [6]:
X = tf.placeholder(tf.float32, [None, trainX.shape[1]], name="X")
Y = tf.placeholder(tf.float32, [None, 2], name="Y")

with tf.name_scope("input"):
    W1 = tf.Variable(tf.random_uniform([trainX.shape[1], 100], -1., 1.), name="W1")
    b1 = tf.Variable(tf.zeros([100]), name="b1")
    L1 = tf.nn.relu(tf.add(tf.matmul(X, W1), b1))

with tf.name_scope("output"):
    W2 = tf.Variable(tf.random_uniform([100, 2], -1., 1.), name="W2")
    b2 = tf.Variable(tf.zeros([2]), name="b2")
    L2 = tf.add(tf.matmul(L1, W2), b2)

with tf.name_scope("optimizer"):
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=L2, labels=Y))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-3).minimize(cost)
    tf.summary.scalar("cost", cost)

prediction = tf.argmax(tf.nn.softmax(L2), axis=1)
target = tf.argmax(Y, axis=1)

is_correct = tf.equal(prediction, target)
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    one_hot_trainY = sess.run(tf.one_hot(trainY, 2))
    one_hot_testY = sess.run(tf.one_hot(testY, 2))
    
    merged = tf.summary.merge_all()
    writer = tf.summary.FileWriter("./logs/train", sess.graph)
    val_writer = tf.summary.FileWriter("./logs/validation", sess.graph)
    
    for i in range(101):
        sess.run(optimizer, feed_dict={X:trainX, Y:one_hot_trainY})
        
        if i % 10 == 0:
            c = sess.run(cost, feed_dict={X:trainX, Y:one_hot_trainY})
            print(i, c, "acc", sess.run(accuracy, feed_dict={X:trainX, Y:one_hot_trainY}))
    
        summary = sess.run(merged, feed_dict={X:trainX, Y:one_hot_trainY})
        writer.add_summary(summary, i)
        writer.flush()
        
        summary = sess.run(merged, feed_dict={X:testX, Y:one_hot_testY})
        val_writer.add_summary(summary, i)
        val_writer.flush()
    
    print("test acc:", sess.run(accuracy, feed_dict={X:testX, Y:one_hot_testY}))
    p = sess.run(prediction, feed_dict={X:testX, Y:one_hot_testY})
    t = sess.run(target, feed_dict={Y: one_hot_testY})
    print(classification_report(t, p))

0 19453.2 acc 0.631455
100 0.279639 acc 0.938967
200 0.246942 acc 0.938967
300 0.173875 acc 0.943662
400 0.15475 acc 0.943662
500 0.150216 acc 0.941315
600 0.143573 acc 0.943662
700 0.140525 acc 0.943662
800 0.138299 acc 0.943662
900 0.13632 acc 0.943662
1000 0.135648 acc 0.943662
test acc: 0.93007
             precision    recall  f1-score   support

          0       0.91      0.98      0.95        88
          1       0.96      0.85      0.90        55

avg / total       0.93      0.93      0.93       143

