In [12]:
# pandas for handling data
import pandas as pd
# pandas for numeric opertions
import numpy as np
# matplotlib for visualization
import matplotlib.pyplot as plt
import seaborn as sns 
# tensorflow our machine learning library
import tensorflow as tf
# train test split for spliting our data
from sklearn.model_selection import train_test_split
# one hot encoding for one hot encoding
from sklearn.preprocessing import OneHotEncoder

In [13]:
# names of columns in our dataset
COLUMNS = ['Sample_code_number', 'Clump_Thickness','Uniformity_of_Cell_Size','Uniformity_of_Cell_Shape',
           'Marginal_Adhesion','Single_Epithelial_Cell_Size','Bare_Nuclei','Bland_Chromatin','Normal_Nucleoli',
           'Mitoses','Class']
# read and store our data
df = pd.read_csv('breast-cancer-wisconsin.data.txt', names=COLUMNS)
# print some item of dataframe
df.head()

Unnamed: 0,Sample_code_number,Clump_Thickness,Uniformity_of_Cell_Size,Uniformity_of_Cell_Shape,Marginal_Adhesion,Single_Epithelial_Cell_Size,Bare_Nuclei,Bland_Chromatin,Normal_Nucleoli,Mitoses,Class
0,1000025,5,1,1,1,2,1,3,1,1,2
1,1002945,5,4,4,5,7,10,3,2,1,2
2,1015425,3,1,1,1,2,2,3,1,1,2
3,1016277,6,8,8,1,3,4,3,7,1,2
4,1017023,4,1,1,3,2,1,3,1,1,2


In [14]:
df.drop('Sample_code_number', axis=1, inplace=True)
# let's see our data
df.head()

Unnamed: 0,Clump_Thickness,Uniformity_of_Cell_Size,Uniformity_of_Cell_Shape,Marginal_Adhesion,Single_Epithelial_Cell_Size,Bare_Nuclei,Bland_Chromatin,Normal_Nucleoli,Mitoses,Class
0,5,1,1,1,2,1,3,1,1,2
1,5,4,4,5,7,10,3,2,1,2
2,3,1,1,1,2,2,3,1,1,2
3,6,8,8,1,3,4,3,7,1,2
4,4,1,1,3,2,1,3,1,1,2


In [15]:
df['Bare_Nuclei'].replace('?', 0, inplace=True)
# then we get the mean of the column
mean = np.asarray(df['Bare_Nuclei'], dtype=np.float).mean()
# fill empty values with mean value
df['Bare_Nuclei'].replace([0], mean, inplace=True)
# let's again check there is any empty value in our dataset
df.isin([0]).any()

Clump_Thickness                False
Uniformity_of_Cell_Size        False
Uniformity_of_Cell_Shape       False
Marginal_Adhesion              False
Single_Epithelial_Cell_Size    False
Bare_Nuclei                    False
Bland_Chromatin                False
Normal_Nucleoli                False
Mitoses                        False
Class                          False
dtype: bool

In [16]:
X_df = df.drop('Class',axis=1)
y_df = df.loc[:, 'Class']
y_df = y_df.reshape(-1, 1)
print("Shape of X before: ", X_df.shape)
print("Shape of y before: ", y_df.shape)

oneHotEncode = OneHotEncoder()
oneHotEncode.fit(X_df)
X_df = oneHotEncode.transform(X_df).toarray()

oneHotEncode.fit(y_df)
y_df = oneHotEncode.transform(y_df).toarray()

print("Shape of X: ", X_df.shape)
print("Shape of y: ", y_df.shape)


Shape of X before:  (699, 9)
Shape of y before:  (699, 1)


  This is separate from the ipykernel package so we can avoid doing imports until


In [17]:
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size=0.2, random_state=True)

In [18]:
learning_rate = 0.1 # learning rate of optimizer
num_epochs = 200
display_step = 10

# network parametres
n_hidden_1 = 300 # number of nodes in hidden layer 1
n_hidden_2 = 300 # number of nodes in hidden layer 2
n_inputs = 9 # number of inputs 
n_classes = 1 # number of outputs

X = tf.placeholder(tf.float32, [None, n_inputs]) # X is our features placeholder. We'll feed data during training time.
y = tf.placeholder(tf.float32, [None, n_classes]) # y is our label placeholder. We'll feed data during training time.

weights = {
    'hidden1' : tf.Variable(tf.random_normal([n_inputs, n_hidden_1])), # weights of hidden layer 1
    'hidden2' : tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])), # weights of hidden layer 2
    'output' : tf.Variable(tf.random_normal([n_hidden_2, n_classes])) # weights of outpur layer
}

biases = {
    'hidden1' : tf.Variable(tf.random_normal([n_hidden_1])), # biases of hidden layer 1
    'hidden2' : tf.Variable(tf.random_normal([n_hidden_2])), # biases of hidden layer 2
    'output' : tf.Variable(tf.random_normal([n_classes])) # biases of output layer
}


In [19]:
layer_1_prediction = tf.add(tf.matmul(X, weights['hidden1']), biases['hidden1']) # layer 1 predicton: y = mx + b
layer_2_prediction = tf.add(tf.matmul(layer_1_prediction, weights['hidden2']), biases['hidden2']) # layer 2 prediction
prediction = tf.add(tf.matmul(layer_2_prediction, weights['output']), biases['output']) # final/ output layer prediction

In [20]:
# let's calculate the cost using softmax cross entropy with logits
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction)) 
# we'll use Adapoptimizer for our optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

# calculate the correct prediction
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

In [21]:
with tf.Session() as sess:
    # Run the initializer
    sess.run(init)

    for step in range(num_epochs):
        sess.run(optimizer, feed_dict={X: X_train, y: y_train}) # we feed the real data into placeholder
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([cost, accuracy], feed_dict={X: X_train,
                                                                 y: y_train})
            # print debugging information
            print("Step " + str(step) + ", Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    # Calculate accuracy for MNIST test images
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={X: X_test,
                                      y: y_test}))

Step 0, Loss= 0.0000, Training Accuracy= 1.000
Step 1, Loss= 0.0000, Training Accuracy= 1.000
Step 10, Loss= 0.0000, Training Accuracy= 1.000
Step 20, Loss= 0.0000, Training Accuracy= 1.000
Step 30, Loss= 0.0000, Training Accuracy= 1.000
Step 40, Loss= 0.0000, Training Accuracy= 1.000
Step 50, Loss= 0.0000, Training Accuracy= 1.000
Step 60, Loss= 0.0000, Training Accuracy= 1.000
Step 70, Loss= 0.0000, Training Accuracy= 1.000
Step 80, Loss= 0.0000, Training Accuracy= 1.000
Step 90, Loss= 0.0000, Training Accuracy= 1.000
Step 100, Loss= 0.0000, Training Accuracy= 1.000
Step 110, Loss= 0.0000, Training Accuracy= 1.000
Step 120, Loss= 0.0000, Training Accuracy= 1.000
Step 130, Loss= 0.0000, Training Accuracy= 1.000
Step 140, Loss= 0.0000, Training Accuracy= 1.000
Step 150, Loss= 0.0000, Training Accuracy= 1.000
Step 160, Loss= 0.0000, Training Accuracy= 1.000
Step 170, Loss= 0.0000, Training Accuracy= 1.000
Step 180, Loss= 0.0000, Training Accuracy= 1.000
Step 190, Loss= 0.0000, Training 