In [5]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import MinMaxScaler

In [6]:
# Implement the sigmoid function from scratch
def sigmoid_f(z):
    s = 1 / (1 + np.exp(-z))
    return s

# Implement the hypothesis function from scratch
def classifier_f(x, w, b):
    linear_function = np.dot(x, w) + b
    prediction_function = sigmoid_f(linear_function)
    return prediction_function

# Implement the entropy function as your cost function
def binary_loss_f(y_true, y_prob, m):
    cross_entropy = -sum(y_true * np.log(y_prob) + (1-y_true) * np.log(1-y_prob)) / m
    return cross_entropy

# Implement gradient descent for logistic regression
def gradient_f(X, y_true, y_prob, lr, m, w, b):
    dw = np.dot(X.T, (y_prob - y_true)) / m
    db = sum(y_prob - y_true) / m
    w = w - lr * dw
    b = b - lr * db
    return w, b

In [7]:
# Implement an optimizer function for logistic regression
def optimizer_f(x, y_true, iteration): 
    m = x.shape[0]
    w = np.zeros(x.shape[1])
    b = 0
    for i in range(iteration):
        y_prob = classifier_f(x, w, b)
        loss = binary_loss_f(y_true, y_prob, m)
        w, b = gradient_f(x, y_true, y_prob, 0.5, m, w, b)
    return w, b

In [8]:
def Q8():
    # (a) Set the target column as Y variable
    y = load_breast_cancer().target.reshape(-1, 1)
    
    # (b) Set all other numeric variables as X matrix
    X = load_breast_cancer().data
    
    # (c) Apply 0-1 normalization on both X and Y
    X = MinMaxScaler().fit_transform(X)
    y = MinMaxScaler().fit_transform(y).reshape(-1)
    
    # (d) Run logistic regression by using the code written above
    w, b = optimizer_f(X, y, 10000)
    
    # (e) Report the final equation obtained for logistic regression
    coefs = {}
    equation = '(e) The final equation is y = %.5f' % b
    for i in range(len(w)):
        coefs[load_breast_cancer().feature_names[i]] = w[i]
        if w[i] >= 0:
            equation += ' +%.5f * %s' % (w[i], load_breast_cancer().feature_names[i])
        else:
            equation += ' %.5f * %s' % (w[i], load_breast_cancer().feature_names[i])
    print(equation)
    
    # (f) Rank coefficients from positive to negative
    sorted_coefs = sorted(coefs.items(), key=lambda item: item[1], reverse=True)
    print('\n(f) The rank of coefficients:')
    for key, value in sorted_coefs:
        print('%s: %.5f' % (key, value))
Q8()

(e) The final equation is y = 15.52747 -1.31128 * mean radius -3.34398 * mean texture -1.39363 * mean perimeter -2.62534 * mean area -0.19934 * mean smoothness +1.32379 * mean compactness -4.57336 * mean concavity -6.28606 * mean concave points +0.08712 * mean symmetry +4.02317 * mean fractal dimension -6.32124 * radius error +0.43057 * texture error -4.55309 * perimeter error -3.89070 * area error -0.90427 * smoothness error +3.87414 * compactness error +1.21656 * concavity error +0.03104 * concave points error +1.29880 * symmetry error +2.44110 * fractal dimension error -5.19022 * worst radius -5.43374 * worst texture -4.41285 * worst perimeter -4.83622 * worst area -3.83927 * worst smoothness -0.34865 * worst compactness -3.18278 * worst concavity -5.15053 * worst concave points -4.14975 * worst symmetry -0.77157 * worst fractal dimension

(f) The rank of coefficients:
mean fractal dimension: 4.02317
compactness error: 3.87414
fractal dimension error: 2.44110
mean compactness: 1.323