In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

In [2]:
def initialiseNetwork(num_features):
    W = np.zeros((num_features, 1))
    b = 0
    parameters = {"W": W, "b": b}
    return parameters

In [3]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [4]:
def forwardPropagation(X, Y, parameters):
    W = parameters["W"]
    b = parameters["b"]
    Z = np.dot(W.T,X) + b
    A = sigmoid(Z)
    return A

In [5]:
def cost(A, Y, num_samples):
    return -1/num_samples *np.sum(Y*np.log(A) + (1-Y)*(np.log(1-A)))

In [6]:
def backPropagration(X, Y, A, num_samples):
    dZ = A - Y
    dW = (np.dot(X,dZ.T))/num_samples
    db = np.sum(dZ)/num_samples
    return dW, db

In [21]:
def updateParameters(parameters, dW, db, learning_rate):
    W = parameters["W"] - (learning_rate * dW)
    b = parameters["b"] - (learning_rate * db)
    return {"W": W, "b": b}


In [7]:
def model(X, Y, num_iter, learning_rate):
    num_features = X.shape[0]
    num_samples = float(X.shape[1])
    parameters = initialiseNetwork(num_features)
    for i in range(num_iter):
        A = forwardPropagation(X, Y, parameters)
        if(i%100 == 0):
            print("cost after {} iteration: {}".format(i, cost(A, Y, num_samples)))
        dW, db = backPropagration(X, Y, A, num_samples)
        parameters = updateParameters(parameters, dW, db, learning_rate)
    return parameters


In [8]:
def predict(W, b, X):
    Z = np.dot(W.T,X) + b
    Y = np.array([1 if y > 0.5 else 0 for y in sigmoid(Z[0])]).reshape(1,len(Z[0]))
    return Y

In [9]:
(X_cancer, y_cancer) = load_breast_cancer(return_X_y = True)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X_cancer, y_cancer,
                                                   random_state = 25)

In [11]:
def normalize(data):
    col_max = np.max(data, axis = 0)
    col_min = np.min(data, axis = 0)
    return np.divide(data - col_min, col_max - col_min)

In [13]:
X_train_n = normalize(X_train)
X_test_n = normalize(X_test)
print(X_train_n.shape)
print(X_test_n.shape)

(426, 30)
(143, 30)


In [14]:
X_trainT = X_train_n.T
X_testT = X_test_n.T
print(X_trainT.shape)
print(X_testT.shape)

(30, 426)
(30, 143)


In [17]:
print(y_train.shape)
print(y_test.shape)

(426,)
(143,)


In [19]:
y_trainT = y_train.reshape(1,426)
y_testT = y_test.reshape(1,143)
print(y_trainT.shape)
print(y_testT.shape)

(1, 426)
(1, 143)


In [22]:
parameters = model(X_trainT, y_trainT, 4000, 0.75)

cost after 0 iteration: 0.6931471805599453
cost after 100 iteration: 0.24382767353051088
cost after 200 iteration: 0.18414919195134818
cost after 300 iteration: 0.1565873493485997
cost after 400 iteration: 0.1396752246321806
cost after 500 iteration: 0.1278729526958286
cost after 600 iteration: 0.1190088775113677
cost after 700 iteration: 0.11202667072700777
cost after 800 iteration: 0.10633924623930972
cost after 900 iteration: 0.10158933661241841
cost after 1000 iteration: 0.09754476494426205
cost after 1100 iteration: 0.0940469433647547
cost after 1200 iteration: 0.09098323338346233
cost after 1300 iteration: 0.08827107206470108
cost after 1400 iteration: 0.08584834873491791
cost after 1500 iteration: 0.08366730760137953
cost after 1600 iteration: 0.08169053991796828
cost after 1700 iteration: 0.07988826663984762
cost after 1800 iteration: 0.0782364464730404
cost after 1900 iteration: 0.07671542796224083
cost after 2000 iteration: 0.07530896965280097
cost after 2100 iteration: 0.074

In [23]:
print(parameters)

{'W': array([[-1.58455606],
       [-2.40924914],
       [-1.66149449],
       [-2.67137327],
       [ 0.7663745 ],
       [ 0.13736327],
       [-4.60035536],
       [-5.75816403],
       [-0.42539435],
       [ 3.91327216],
       [-3.26972938],
       [ 1.36829576],
       [-2.06525156],
       [-2.41978688],
       [-1.3430973 ],
       [ 2.70019395],
       [ 0.55199124],
       [ 1.17146685],
       [ 0.51217921],
       [ 1.88542175],
       [-5.18142462],
       [-5.40012744],
       [-4.48583887],
       [-4.59617795],
       [-3.34899524],
       [-1.20950225],
       [-3.67728035],
       [-5.02114259],
       [-3.11319132],
       [-0.20109589]]), 'b': 14.520237920116649}


In [26]:
yPredTrain = predict(parameters['W'],parameters['b'],X_trainT)   # pass weigths and bias from parameters dictionary and X_trainT as input to the function
yPredTest = predict(parameters['W'],parameters['b'],X_testT)    # pass the same parameters but X_testT as input data

In [27]:
accuracy_train = 100 - np.mean(np.abs(yPredTrain - y_trainT)) * 100
accuracy_test = 100 - np.mean(np.abs(yPredTest - y_testT)) * 100
print("train accuracy: {} %".format(accuracy_train))
print("test accuracy: {} %".format(accuracy_test))
with open("Output.txt", "w") as text_file:
    text_file.write("train= %f\n" % accuracy_train)
    text_file.write("test= %f" % accuracy_test)

train accuracy: 98.59154929577464 %
test accuracy: 93.00699300699301 %
