In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

def sigmoid(Z):
    return 1 / (1+np.exp(-Z))    

In [2]:
dataset = load_breast_cancer()
X = dataset.data
Y = dataset.target
nx = np.shape(dataset.data)[1]
m = len(dataset.target)
trainX, testX, trainY, testY = train_test_split(X, Y, test_size=0.2)
trainX = trainX.T
testX = testX.T
trainY = trainY.reshape((1, len(trainY)))
testY = testY.reshape((1, len(testY)))

In [3]:
print("Shape of trainX:", np.shape(trainX))
print("Shape of testX:", np.shape(testX))
print("Shape of trainY:", np.shape(trainY))
print("Shape of testY:", np.shape(testY))
print("Number of features:", nx)

Shape of trainX: (30, 455)
Shape of testX: (30, 114)
Shape of trainY: (1, 455)
Shape of testY: (1, 114)
Number of features: 30


In [4]:
#Hyperparameters
nepochs = 10
alpha = 0.001

#Initialize weights and intercept
w = np.zeros((nx,1))
b = np.zeros((1,1))
print("Shape of w:", np.shape(w))
print("Shape of b:", np.shape(b))

#Standardize features
mu = np.mean(trainX, axis=1)
sd = np.std(trainX, axis=1)
mu = mu.reshape((len(mu), 1))
sd = sd.reshape((len(sd), 1))
trainX = (trainX - mu) / sd
testX = (testX - mu) / sd

#logistic regression
for step in range(nepochs): 
    #Forward propagation
    Z = np.dot(w.T, trainX) + b
    A = sigmoid(Z)
    
    #Backward propagation
    dZ = A - trainY
    dw = 1/m * np.dot(trainX, dZ.T)
    db = 1/m * np.sum(dZ)
    
    #Update weights and intercept
    w = w - alpha * dw
    b = b - alpha * db

Shape of w: (30, 1)
Shape of b: (1, 1)


In [5]:
Z = np.dot(w.T, testX) + b
pred = sigmoid(Z)
positive_index = pred>0.5
negative_index = pred<=0.5
pred[positive_index] = 1
pred[negative_index] = 0
pred = pred.astype(int)
print("Confusion matrix:\n", confusion_matrix(pred[0], testY[0]))
acc = (confusion_matrix(pred[0], testY[0])[0,0] + confusion_matrix(pred[0], testY[0])[1,1]) / len(pred[0])
print("Accuracy:", acc)

Confusion matrix:
 [[32  6]
 [ 4 72]]
Accuracy: 0.9122807017543859
