In [340]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import copy

In [354]:
# Get the dataset
df = pd.read_csv('data.csv').values
# Remove patient ID column
df = np.delete(df, 0, 1)

X = df[:, 1:].T.astype(float)
print(X.shape)
Y = df[:, 0].T.reshape(1, len(df))

# Binary representation of diagnosis 0 for benign 1 for malignant
Y = np.where(Y == 'M', 1, 0).astype(int)
print(Y.shape)


def normalize(X):
    # Data normalization (minmax)
    vmin = np.min(X, axis=1, keepdims=True)
    vmax = np.max(X, axis=1, keepdims=True)

    return (X - vmin) / (vmax - vmin)

def denormalize(X):
    vmin = np.min(X, axis=1, keepdims=True)
    vmax = np.max(X, axis=1, keepdims=True)

    return X * (vmax - vmin) + vmin

X = normalize(X)
print(X)

(30, 568)
(1, 568)
[[0.64314449 0.60149557 0.2100904  ... 0.45525108 0.64456434 0.03686876]
 [0.27257355 0.3902604  0.36083869 ... 0.62123774 0.66351031 0.50152181]
 [0.61578329 0.59574321 0.23350149 ... 0.44578813 0.66553797 0.02853984]
 ...
 [0.63917526 0.83505155 0.88487973 ... 0.48728522 0.91065292 0.        ]
 [0.23358959 0.40370589 1.         ... 0.12872068 0.49714173 0.25744136]
 [0.22287813 0.21343303 0.77371114 ... 0.1519087  0.45231536 0.10068215]]


In [342]:
# Constants defining the model
n_x = X.shape[0]
n_h = 7
n_y = 1
learning_rate = .0075
n_iterations = 3000

In [343]:
def initialiaze_parameters(n_x, n_h, n_y):
    """
    Argument:
    n_x -- size of the input layer
    n_h -- size of the hidden layer
    n_y -- size of the output layer

    Returns:
    params -- python dictionary containing the parameters:
                    W1 -- weight matrix of shape (n_h, n_x)
                    b1 -- bias vector of shape (n_h, 1)
                    W2 -- weight matrix of shape (n_y, n_h)
                    b2 -- bias vector of shape (n_y, 1)
    """
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1= np.random.randn(n_h, 1) * 0.01
    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.random.randn(n_y, 1) * 0.01

    return {
        "W1": W1,
        "b1": b1,
        "W2": W2,
        "b2": b2
    }

def initialize_parameters_deep(layer_dims):
    """
    Arguments:
    layer_dims -- python array (list) containing the dimensions of each layer in our network

    Returns -- python dic containing W1, b1...
    Wl -- weight matrix
    bl -- bias vector
    """

    parameters = {}
    
    for l in range(1, len(layer_dims)):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) / np.sqrt(layer_dims[l-1])
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))

    return parameters;

In [344]:
def sigmoid(z):
    """
    compute de sigmoid of z

    Arguments:
    z -- A scalar of np array of any size

    Return:
    sigmoid(z)
    """
    A = 1 / (1 + np.exp(-z))
    cache = z

    return A, cache
# assert(np.allclose(sigmoid(np.array([0, 2])), np.array([0, 2]), np.array([0.5, 0.88079708]), atol=1e-7))

def relu(z):
    """
    compute de relu of z

    Arguments:
    z -- A scalar of np array of any size

    Return:
    relu(z)
    """
    A = np.maximum(0, z)
    cache = z

    return A, cache

assert(relu(np.array([2, 10, 20])) == np.array([2, 10, 20])).all()

In [345]:
def linear_forward(A, W, b):
    """
    Linear forward propagation

    Arguments:
    A -- activation from previous layer 
    W -- weight matrix, np.array().shape((size of current layer, size of previous layer))
    b -- bias vector, np.array().shape((size of current layer, 1))
    """
    Z = W.dot(A) + b
    cache = (A, W, b)

    return Z, cache

def linear_activation_forward(A_prev, W, b, activation):
    if (activation == 'sigmoid'):
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)

    elif (activation == 'relu'):
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)

    cache = (linear_cache, activation_cache)

    return A, cache

def multilayers_model_forward(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2

    for l in range(1, L):
        A_prev = A 
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], "relu")
        caches.append(cache)

    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], 'sigmoid')
    caches.append(cache)

    return AL, caches

In [346]:
def compute_cost(AL, Y):
    m = Y.shape[1]

    cost = -1 / m * np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL))

    return np.squeeze(cost);

In [347]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = (1 / m) * dZ.dot(A_prev.T)
    db = (1 / m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache

    if activation == "relu":
        dZ = np.array(dA, copy=True)
        dZ[activation_cache <= 0] = 0
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    elif (activation == 'sigmoid'):
        s = 1 / (1 + np.exp(-activation_cache))
        dZ = dA * s * (1 - s)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    

    return dA_prev, dW, db

def multilayer_model_backward(AL, Y, caches):
    """
    Initializing backpropagation
    """
    grads = {}
    L = len(caches)

    # derivative of cost
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))

    curr_cache = caches[L - 1]
    grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, curr_cache, activation='sigmoid')

    for l in reversed(range(L - 1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, activation = "relu")
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads

In [348]:
def update_parameters(params, grads, learning_rate):
    parameters = copy.deepcopy(params)
    L = len(parameters) // 2

    for l in range(L):
        parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - learning_rate * grads["dW" + str(l + 1)]
        parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - learning_rate * grads["db" + str(l + 1)]

    return parameters

In [367]:
def multilayer_model(X, Y, layers_dims, learning_rate = .000001, n_iterations = 2000):
    """
    Multilayer neural networks

    Arguments:
    X -- input data, of shape (n_x, number of examples)
    Y -- label vector containing (1 if Malignant cancer, 0 either), of shape (1, number of examples)
    """
    parameters = initialize_parameters_deep(layers_dims)
    
    # navigate through each layers
    for i in range(1, n_iterations): 
        AL, caches = multilayers_model_forward(X, parameters)
        
        cost = compute_cost(AL, Y)
        if i % 100 == 0:
            print('iteration: ' + str(i) + ' cost: ' + str(cost))

        gradients = multilayer_model_backward(AL, Y, caches)

        parameters = update_parameters(parameters, gradients, learning_rate)

    return parameters, cost

In [368]:
layers_dims = [30, 20, 10, 1]
parameters, cost = multilayer_model(X, Y, layers_dims)

iteration: 100 cost: 0.6885041601713202
iteration: 200 cost: 0.6857152217003234
iteration: 300 cost: 0.6830873511061698
iteration: 400 cost: 0.6806048193871257
iteration: 500 cost: 0.6782413298491818
iteration: 600 cost: 0.6759752432500118
iteration: 700 cost: 0.6737911732748414
iteration: 800 cost: 0.6716688463792811
iteration: 900 cost: 0.6695995088680067
iteration: 1000 cost: 0.6675716335343103
iteration: 1100 cost: 0.6655740469902559
iteration: 1200 cost: 0.6636295571060808
iteration: 1300 cost: 0.6617224049290351
iteration: 1400 cost: 0.6598288459167453
iteration: 1500 cost: 0.6579291394511261
iteration: 1600 cost: 0.6560073295531217
iteration: 1700 cost: 0.6540678623531186
iteration: 1800 cost: 0.6520017534862997
iteration: 1900 cost: 0.64980924340053


In [361]:
# def predict(X, parameters):
#     m = X.shape[1]
#     p = np.zeros((1, m))

#     probas, _ = multilayers_model_forward(X, parameters)

#     print(X.shape);
#     for i in range(0, probas.shape[1]):
#         print(probas[0, i])
#         p[0, i] = 1 if probas[0, i] > 0.5 else 0

#     return p
def predict(X, parameters):
    m = X.shape[1]
    p = np.zeros((1, m))

    probas, _ = multilayers_model_forward(X, parameters)
    
    p = (probas > 0.5).astype(int)  # Simplified prediction logic

    return p


# res = predict(X, parameters)
# right = 0;
# for l in range(0, X.shape[1]):
#     if (df[l][0] == 'M' if 1 else 0) == X[0, l]:
#         right += 1
# print(f"errors={right / X.shape[1]}")

single_example = np.array([[14.27,22.55,93.77,629.8,0.1038,0.1154,0.1463,0.06139,0.1926,0.05982,0.2027,1.851,1.895,18.54,0.006113,0.02583,0.04645,0.01276,0.01451,0.003756,15.29,34.27,104.3,728.3,0.138,0.2733,0.4234,0.1362,0.2698,0.08351],
[11.69,24.44,76.37,406.4,0.1236,0.1552,0.04515,0.04531,0.2131,0.07405,0.2957,1.978,2.158,20.95,0.01288,0.03495,0.01865,0.01766,0.0156,0.005824,12.98,32.19,86.12,487.7,0.1768,0.3251,0.1395,0.1308,0.2803,0.0997]])
ex_X = normalize(single_example).T 

# Predict
prediction = predict(ex_X, parameters)
print(prediction)

[[0 0]]
