In [198]:
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.python.framework import ops
from sklearn.preprocessing import Imputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [199]:
columns=['Sample_code_number', 'Clump_Thickness','Uniformity_of_Cell_Size','Uniformity_of_Cell_Shape',
           'Marginal_Adhesion','Single_Epithelial_Cell_Size','Bare_Nuclei','Bland_Chromatin','Normal_Nucleoli',
           'Mitoses','Class']
data=pd.read_csv("breast-cancer-wisconsin.data.txt",names=columns)
#data=data.drop('Sample_code_number',axis=1)

## Correlation matrix

In [200]:
corr_matrix=data.corr()
corr_matrix['Class'].sort_values()


Sample_code_number            -0.080226
Mitoses                        0.423170
Single_Epithelial_Cell_Size    0.682785
Marginal_Adhesion              0.696800
Normal_Nucleoli                0.712244
Clump_Thickness                0.716001
Bland_Chromatin                0.756616
Uniformity_of_Cell_Size        0.817904
Uniformity_of_Cell_Shape       0.818934
Class                          1.000000
Name: Class, dtype: float64

### Data Imputation

In [201]:
data=data.drop('Sample_code_number',axis=1)
data['Bare_Nuclei'].replace('?',0, inplace=True)
imputer=Imputer(strategy="median")
col=data.columns
data=imputer.fit_transform(data)
data=pd.DataFrame(data,columns=col)


X_df = data.iloc[:, :9]
y_df = data.loc[:, 'Class']
y_df = y_df.reshape(-1, 1)
print("Shape of X before: ", X_df.shape)
print("Shape of y before: ", y_df.shape)

oneHotEncode = OneHotEncoder()
oneHotEncode.fit(X_df)
X_df = oneHotEncode.transform(X_df).toarray()

oneHotEncode.fit(y_df)
y_df = oneHotEncode.transform(y_df).toarray()

print("Shape of X: ", X_df.shape)
print("Shape of y: ", y_df.shape)



Shape of X before:  (699, 9)
Shape of y before:  (699, 1)
Shape of X:  (699, 90)
Shape of y:  (699, 2)


  # This is added back by InteractiveShellApp.init_path()


Unnamed: 0,Clump_Thickness,Uniformity_of_Cell_Size,Uniformity_of_Cell_Shape,Marginal_Adhesion,Single_Epithelial_Cell_Size,Bare_Nuclei,Bland_Chromatin,Normal_Nucleoli,Mitoses,Class
0,5.0,1.0,1.0,1.0,2.0,1.0,3.0,1.0,1.0,2.0
1,5.0,4.0,4.0,5.0,7.0,10.0,3.0,2.0,1.0,2.0
2,3.0,1.0,1.0,1.0,2.0,2.0,3.0,1.0,1.0,2.0
3,6.0,8.0,8.0,1.0,3.0,4.0,3.0,7.0,1.0,2.0
4,4.0,1.0,1.0,3.0,2.0,1.0,3.0,1.0,1.0,2.0
5,8.0,10.0,10.0,8.0,7.0,10.0,9.0,7.0,1.0,4.0
6,1.0,1.0,1.0,1.0,2.0,10.0,3.0,1.0,1.0,2.0
7,2.0,1.0,2.0,1.0,2.0,1.0,3.0,1.0,1.0,2.0
8,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,5.0,2.0
9,4.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0


## Creating test set

In [190]:
train_data,test_data,train_set_label,test_set_label=train_test_split(X_df,y_df,train_size=0.8,random_state=30)




train_set_label=train_data['Class']
train_data=train_data.drop('Class',axis=1)
test_set_label=test_data['Class']
test_data=test_data.drop('Class',axis=1)
train_set_label=train_set_label.reshape(-1,1)
test_set_label=test_set_label.reshape(-1,1)
oneHotEncode = OneHotEncoder()
oneHotEncode.fit(train_data)
train_data = oneHotEncode.transform(train_data).toarray()

oneHotEncode.fit(train_set_label)
train_set_label = oneHotEncode.transform(train_set_label).toarray()

oneHotEncode.fit(test_data)
test_data = oneHotEncode.transform(test_data).toarray()

oneHotEncode.fit(test_set_label)
test_set_label = oneHotEncode.transform(test_set_label).toarray()

print("Shape of X: ", train_data.shape)
print("Shape of y: ", test_data.shape)


### Create placeholders

In [191]:
def create_placeholder(n_x,n_y):
    X=tf.placeholder(tf.float32,[None,n_x],name="X")
    Y=tf.placeholder(tf.float32,[None,n_y],name="Y")
    return X,Y

### Initialize parameters

In [202]:
def initialize():
    W1 = tf.get_variable("W1",[90,10],initializer=tf.contrib.layers.xavier_initializer(seed = 1))
    #W1=tf.Variable(tf.random_normal([90, 10]))
    #b1 = tf.get_variable("b1",[1,10],initializer=tf.contrib.layers.xavier_initializer(seed = 1)
    b1=tf.Variable(tf.random_normal([10]))
    #W2=  tf.get_variable("W2",[10,5],initializer=tf.contrib.layers.xavier_initializer(seed = 1))
    W2=tf.Variable(tf.random_normal([10, 5]))
    #b2 = tf.get_variable("b2",[1,5],initializer=tf.contrib.layers.xavier_initializer(seed = 1))
    b2=tf.Variable(tf.random_normal([5]))
    #W3 = tf.get_variable("W3",[5,2],initializer=tf.contrib.layers.xavier_initializer(seed = 1))
    W3=tf.Variable(tf.random_normal([5, 2]))                                                                                  
    #b3 =tf.get_variable("b3",[1,2],initializer=tf.contrib.layers.xavier_initializer(seed = 1))
    b3=tf.Variable(tf.random_normal([2]))
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2,
                  "W3": W3,
                  "b3": b3}
    return parameters

### Forward Prop

In [203]:
def forward_prop(X,parameters):
    
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']
    Z1 = tf.add(tf.matmul(X,W1),b1)
    A1 = tf.nn.relu(Z1)
    Z2 = tf.add(tf.matmul(Z1,W2),b2)
    A2 = tf.nn.relu(Z2)
    Z3 = tf.add(tf.matmul(Z2,W3),b3)
    return Z3
    
    

#### Cost function

In [204]:
def compute_cost(Z3,Y):
    cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=Z3,labels= Y))
    print(cost)
    return cost

### Building final model

In [205]:
def model(train_data, train_set_label, test_data, test_set_label):
    
    learning_rate = 0.001
    iters = 150
   
    display_step = 10
    print_cost = True
    ops.reset_default_graph()                                                  # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)                             # to keep consistent results
    seed = 3                                          # to keep consistent results
    n_x=90                         # (n_x: input size, m : number of examples in the train set)
    n_y = 2                            # n_y : output size
       
    X,Y=create_placeholder(n_x,n_y)
    parameters=initialize()
    Z3=forward_prop(X,parameters)
    cost=compute_cost(Z3,Y)
    optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    correct_pred = tf.equal(tf.argmax(Z3,1), tf.argmax(Y,1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
    # Run the initializer
        sess.run(init)
       

        for step in range(iters):
            sess.run(optimizer, feed_dict={X: train_data, Y: train_set_label}) # we feed the real data into placeholder
            if step % display_step == 0 or step == 1:
                # Calculate batch loss and accuracy
                loss, acc = sess.run([cost, accuracy], feed_dict={X: train_data, Y: train_set_label})
                # print debugging information
                print("Step " + str(step) + ", Loss= " + \
                      "{:.4f}".format(loss) + ", Training Accuracy= " + \
                      "{:.3f}".format(acc))
                

        print("Optimization Finished!")

        # Calculate accuracy for MNIST test images
        print("Testing Accuracy:", \
            sess.run([accuracy,cost], feed_dict={X: test_data, Y: test_set_label}))

       
                                                                 
        
   

In [206]:
parameters=model(train_data, train_set_label, test_data, test_set_label)

Tensor("Mean:0", shape=(), dtype=float32)
Step 0, Loss= 1.2551, Training Accuracy= 0.662
Step 1, Loss= 1.2031, Training Accuracy= 0.662
Step 10, Loss= 0.7809, Training Accuracy= 0.680
Step 20, Loss= 0.4564, Training Accuracy= 0.750
Step 30, Loss= 0.2862, Training Accuracy= 0.862
Step 40, Loss= 0.2039, Training Accuracy= 0.927
Step 50, Loss= 0.1619, Training Accuracy= 0.941
Step 60, Loss= 0.1390, Training Accuracy= 0.957
Step 70, Loss= 0.1252, Training Accuracy= 0.961
Step 80, Loss= 0.1156, Training Accuracy= 0.964
Step 90, Loss= 0.1081, Training Accuracy= 0.964
Step 100, Loss= 0.1019, Training Accuracy= 0.966
Step 110, Loss= 0.0966, Training Accuracy= 0.968
Step 120, Loss= 0.0920, Training Accuracy= 0.968
Step 130, Loss= 0.0878, Training Accuracy= 0.971
Step 140, Loss= 0.0841, Training Accuracy= 0.973
Optimization Finished!
Testing Accuracy: [0.97857141, 0.073713012]
