# Logistic Regression with numpy
First attempt to use the logistic regression to classify wheter a patient has cancer or not.

In [27]:
# Numpy
import numpy as np

# Dataset
from sklearn.datasets import load_breast_cancer

# Plotting
import matplotlib.pyplot as plt
# Enable plotting inline
%matplotlib inline

## 	Prerequisite

In [28]:
data = load_breast_cancer()

In [29]:
X_data = data["data"].T #We reshape since we want to have the features column wise
Y_data = data["target"]

# Y_data is a one-dimensional, so we make it a two-dimensional
# for better data handling

Y_data = Y_data.reshape((1,Y_data.shape[0]))

# 90% for training, 10% for testing
m_train = int(X_data.shape[1]*0.9)
#m_test = X_data.shape[0]-m_train

# Split into training and test data
X_train_data = X_data[:,:m_train]
X_test_data = X_data[:,m_train:]

Y_train_data = Y_data[:,:m_train]
Y_test_data = Y_data[:,m_train:]

# Print out the final shapes
print("X-Training-Data-Shape:", X_train_data.shape)
print("Y-Training-Data-Shape:", Y_train_data.shape)
print("X-Test-Data-Shape:", X_test_data.shape)
print("Y-Test-Data-Shape:", Y_test_data.shape)

X-Training-Data-Shape: (30, 512)
Y-Training-Data-Shape: (1, 512)
X-Test-Data-Shape: (30, 57)
Y-Test-Data-Shape: (1, 57)


## Helper functions
- Sigmoid function: __Calculates the sigmoid values for an array__
- Parameter initialization: __Initializes parameters with 0s, depending on dimension__

In [30]:
def sigmoid(X):
    s = 1/(1+np.exp(-X))
    return s

# Testing the function
t = np.array([[1,2],[3,4]])
s_t = sigmoid(t)
assert(np.array_equal(s_t.round(decimals=2), np.array([[0.73,0.88],[0.95,0.98]])))

In [31]:
def init_param(dim):
    b = 0 #simple
    w = np.zeros((dim,1))
    return w,b

# Testing the function
w,b = init_param(3)
assert(b == 0)
assert(np.array_equal(np.array([[0],[0],[0]]),w))

## Costs and Gradient calculation and Optimization
- propagate: __Calculates the cost and the gradients of propagating X with w,b__
- optimize: __Optimizes the w and b parameter with the gradients stepwise until good solution found__
- predict: __Predicts an Ouput__

In [32]:
def propagate(X,Y,w,b):
    
    m = X.shape[1]
    
    A = sigmoid(np.dot(w.T,X)+b)
    cost = -(1/m)*np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))
    
    dz = A-Y
    dw = (1/m)*np.dot(X,dz.T)
    db = (1/m)*np.sum(dz)
    
    assert(dw.shape == w.shape)
    assert(db.dtype == float)
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    
    return cost, dw, db
    
# Use the method
# w,b = init_param(X_train_data.shape[0])
# cost, dw, db = propagate(X_train_data,Y_train_data,w,b)

In [48]:
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
    i = 0
    while i < num_iterations:
        cost,dw,db = propagate(X,Y,w,b)
        
        if print_cost == True and i % 1000 == 0:
            print("Iteration:", i, "Cost:", cost)
        
        # Adjust the parameters
        w = w - learning_rate * dw
        b = b - learning_rate * db
        i += 1
    return w,b

# Test
w,b = init_param(X_train_data.shape[0])
w,b = optimize(w,b,X_train_data,Y_train_data,300000,0.1,False)

  
  
  


In [49]:
def predict(w, b, X):
    m = X.shape[1]
    Y_prediction = np.zeros((1,m))
    w = w.reshape(X.shape[0], 1)
    A = sigmoid(np.dot(w.T,X)+b)
    
    Y_prediction[A <= 0.5] = 0
    Y_prediction[A > 0.5] = 1
    
    assert(Y_prediction.shape == (1, m))
    return Y_prediction

def performance(w,b,X,Y):
    yhat = predict(w,b,X)
    error = 1-(1/Y.shape[1]*np.sum(np.abs(Y-yhat)))
    return error

In [50]:
performance(w,b,X_train_data,Y_train_data)

  


0.94140625