In [1]:
from sklearn import datasets
import numpy as np

from typing import *
import copy

In [2]:
breast_cancer = datasets.load_breast_cancer()

X = breast_cancer.data
y = breast_cancer.target


mean = np.mean(X, axis=0)
std = np.std(X, axis=0)

X = (X - mean) / std

print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

Shape of X: (569, 30)
Shape of y: (569,)


In [3]:
#splitting into test and train
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=55)
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of Y_train:", Y_train.shape)
print("Shape of Y_test:", Y_test.shape)

Shape of X_train: (455, 30)
Shape of X_test: (114, 30)
Shape of Y_train: (455,)
Shape of Y_test: (114,)


In [4]:
X_train = X_train.T 
X_test = X_test.T

Y_train = Y_train.reshape(1, -1)
Y_test = Y_test.reshape(1,-1)

In [5]:
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of Y_train:", Y_train.shape)
print("Shape of Y_test:", Y_test.shape)

Shape of X_train: (30, 455)
Shape of X_test: (30, 114)
Shape of Y_train: (1, 455)
Shape of Y_test: (1, 114)


In [6]:
def initialize_parameters_deep(layer_dims: List[int]) -> Dict[str, np.ndarray]:
    
    np.random.seed(5)
    parameters: Dict[str, np.ndarray] = {}
    L = len(layer_dims) #number of layers in the network

    for l in range(1, L):
        #initializing parameters
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
        
        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l - 1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))

    return parameters


In [7]:
import numpy as np
from typing import Tuple

def linear_forward(A: np.ndarray, W: np.ndarray, b: np.ndarray) -> Tuple[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]:
    Z = np.dot(W, A) + b #A is the activations from the previous layer
    cache = (A, W, b)
    
    return Z, cache

In [8]:
def sigmoid(Z: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    A = 1 / (1 + np.exp(-Z))
    return A, Z

def relu(Z: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    A = np.maximum(0, Z)
    return A, Z

In [9]:
def linear_activation_forward(A_prev: np.ndarray, W: np.ndarray, b: np.ndarray, activation: str) -> Tuple[np.ndarray, Tuple[Tuple[np.ndarray, np.ndarray, np.ndarray], np.ndarray]]:

    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    
    elif activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)
    
    cache = (linear_cache, activation_cache)

    return A, cache

In [10]:
def L_model_forward(X: np.ndarray, parameters: Dict[str, np.ndarray]) -> Tuple[np.ndarray, List[Tuple[Tuple[np.ndarray, np.ndarray, np.ndarray], np.ndarray]]]:
    #AL: activation value from the last layer
    #caches: every cache of linear_activation_forward() (L caches indexed from 0 to L-1)
    caches = []
    A = X
    L = len(parameters) // 2  #number of layers in the neural network

    #[LINEAR -> RELU]*(L-1). adding "cache" to the "caches" list.
    for l in range(1, L):
        A_prev = A 
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation="relu")
        caches.append(cache)
    
    #LINEAR -> SIGMOID. adding "cache" to the "caches" list.
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], activation="sigmoid")
    caches.append(cache)
          
    return AL, caches

In [11]:
def compute_cost(AL: np.ndarray, Y: np.ndarray) -> Union[float, np.ndarray]:
    m = Y.shape[1]

    cost = -1/m * np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL))
    
    cost = np.squeeze(cost)  

    return cost


\[
\begin{align*}
dW^{[l]} &= \frac{\partial \mathcal{J}}{\partial W^{[l]}} = \frac{1}{m} dZ^{[l]} A^{[l-1]^T} \\
db^{[l]} &= \frac{\partial \mathcal{J}}{\partial b^{[l]}} = \frac{1}{m} \sum_{i=1}^{m} dZ^{[l](i)} \\
dA^{[l-1]} &= \frac{\partial \mathcal{L}}{\partial A^{[l-1]}} = W^{[l]^T} dZ^{[l]}
\end{align*}
\]
\begin{align}
dZ^{[l]} = dA^{[l]} \odot g'(Z^{[l]}).
\end{align}



In [12]:
def linear_backward(dZ: np.ndarray, cache: Tuple[np.ndarray, np.ndarray, np.ndarray]) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:

    A_prev, W, b = cache
    m = A_prev.shape[1]

    #computing the gradients
    dW = (1/m) * np.dot(dZ, A_prev.T)
    db = (1/m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)
    
    return dA_prev, dW, db


In [13]:
def sigmoid_backward(dA: np.ndarray, activation_cache: np.ndarray) -> np.ndarray:
    Z = activation_cache
    s = 1 / (1 + np.exp(-Z))
    dZ = dA * s * (1 - s)
    return dZ

def relu_backward(dA: np.ndarray, activation_cache: np.ndarray) -> np.ndarray:
    Z = activation_cache
    dZ = np.array(dA, copy=True) 
    #when z <= 0 -> dz is 0 
    dZ[Z <= 0] = 0
    return dZ

In [14]:
def linear_activation_backward(dA: np.ndarray, cache: Tuple[Tuple[np.ndarray, np.ndarray, np.ndarray], np.ndarray], activation: str) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    
    linear_cache, activation_cache = cache
    
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
    else:
        raise ValueError("Unsupported activation function: {}".format(activation))
    
    dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db

In [15]:
def L_model_backward(AL: np.ndarray, Y: np.ndarray, caches: List[Tuple[Tuple[np.ndarray, np.ndarray, np.ndarray], np.ndarray]]) -> Dict[str, np.ndarray]:
    
    #AL: probability vector, output of the forward propagation (L_model_forward())
    #caches: linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)
        # : the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])
    grads = {}
    L = len(caches) #the number of layers
    m = AL.shape[1]
    Y = Y.reshape(AL.shape) 
    
    #initializing backprop
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) 
    
    #Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "dAL, current_cache". Outputs: "grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)]
    current_cache = caches[-1]
    dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dAL, current_cache, activation="sigmoid")
    grads["dA" + str(L-1)] = dA_prev_temp
    grads["dW" + str(L)] = dW_temp
    grads["db" + str(L)] = db_temp
    
    #looping from l=L-2 to l=0
    for l in reversed(range(L-1)):
        #lth layer: (RELU -> LINEAR) gradients.
        #Inputs: "grads["dA" + str(l + 1)], current_cache". Outputs: "grads["dA" + str(l)], grads["dW" + str(l + 1)], grads["db" + str(l + 1)] 
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, activation="relu")
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads

In [16]:
def update_parameters(params: Dict[str, np.ndarray], grads: Dict[str, np.ndarray], learning_rate: float) -> Dict[str, np.ndarray]:
    parameters = copy.deepcopy(params)
    L = len(parameters) // 2  

    for l in range(L):
        parameters["W" + str(l+1)] -= learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] -= learning_rate * grads["db" + str(l+1)]
    
    return parameters


In [17]:
def model(X: np.ndarray, Y: np.ndarray, layers_dims: List[int], learning_rate: float = 0.01, num_iterations: int = 10000, print_cost: bool = False) -> Dict[str, np.ndarray]:
    np.random.seed(1)
    costs = []
    parameters = initialize_parameters_deep(layers_dims)
    for i in range(0, num_iterations):
        AL, caches = L_model_forward(X, parameters)
        cost = compute_cost(AL, Y)
        grads = L_model_backward(AL, Y, caches)
        parameters = update_parameters(parameters, grads, learning_rate)
        if print_cost and i % 1000 == 0:
            print(f"Cost after iteration {i}: {cost}")
            costs.append(cost)
    return parameters

In [18]:
layers_dims = [30, 120, 120, 512, 60, 30, 1]

In [19]:
parameters = model(X_train, Y_train, layers_dims, num_iterations=10000, print_cost=True)

Cost after iteration 0: 0.693147152941962
Cost after iteration 1000: 0.6527453316270071
Cost after iteration 2000: 0.6524031307248154
Cost after iteration 3000: 0.6523997764838727
Cost after iteration 4000: 0.6523997255392932
Cost after iteration 5000: 0.6523997102755271
Cost after iteration 6000: 0.65239969759289
Cost after iteration 7000: 0.652399686555513
Cost after iteration 8000: 0.652399676842074
Cost after iteration 9000: 0.6523996682035341


In [20]:
def predict(X: np.ndarray, parameters: Dict[str, np.ndarray]) -> np.ndarray:
    AL, _ = L_model_forward(X, parameters)
    predictions = (AL > 0.5).astype(int)
    return predictions

In [21]:
def evaluate(X: np.ndarray, Y: np.ndarray, parameters: Dict[str, np.ndarray]) -> float:
    predictions = predict(X, parameters)
    accuracy = np.mean(predictions == Y)
    return accuracy

In [22]:
predictions = predict(X_test, parameters)
print(f"Predictions: {predictions}")
print(f"Ground Truth: {Y_test}")
    
accuracy = evaluate(X_test, Y_test, parameters)
print(f"Accuracy: {accuracy}")

Predictions: [[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1]]
Ground Truth: [[0 1 1 0 1 0 1 0 0 0 0 0 1 1 1 0 1 0 0 0 1 1 1 0 0 1 1 0 0 1 1 1 0 0 0 1
  1 1 1 1 1 0 1 0 1 0 0 1 1 1 0 0 1 1 1 0 1 1 0 1 1 0 0 1 0 0 1 0 0 0 0 1
  1 0 1 0 0 1 1 0 1 1 1 1 0 0 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0
  1 0 0 1 1 1]]
Accuracy: 0.5701754385964912
