In [1]:
from typing import Tuple

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler

In [2]:
data = pd.read_csv("https://raw.githubusercontent.com/changyaochen/MECE4520/master/lectures/lecture_4/breast_cancer_data.csv")
data["label"] = data["diagnosis"].apply(lambda x: 0 if x == "B" else 1)
data.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave_mean,...,texture_extreme,perimeter_extreme,area_extreme,smoothness_extreme,compactness_extreme,concavity_extreme,concave_extreme,symmetry_extreme,fractal_extreme,label
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,1
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,1
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,1
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,1
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,1


## Forward propagation of a 2-layer NN

In [3]:
features = [
    "radius_mean",
    "texture_mean",
    "perimeter_mean",
    "area_mean",
    "smoothness_mean",
    "compactness_mean",
    "concavity_mean",
    "concave_mean",
    "symmetry_mean",
    "fractal_mean",
    "radius_se",
    "texture_se",
    "perimeter_se",
    "area_se",
    "smoothness_se",
    "compactness_se",
    "concavity_se",
    "concave_se",
    "symmetry_se",
    "fractal_se",
    "radius_extreme",
    "texture_extreme",
    "perimeter_extreme",
    "area_extreme",
    "smoothness_extreme",
    "compactness_extreme",
    "concavity_extreme",
    "concave_extreme",
    "symmetry_extreme",
    "fractal_extreme",
]
label = "label"

X = data[features].values
X = StandardScaler().fit_transform(X)  # standardize the X
Y = data[label].values.reshape((-1, 1))

In [4]:
# forward pass for a simple 2-layer NN, with 3 hidden units
np.random.seed(10)

def sigmoid(x):
    """Calculates sigmoid function."""
    return 1. / (1 + np.exp(-x))

# parameters for the first layer
W_1 = np.random.normal(size=(3, X.shape[1]))
print(f"Shape of W_1 is {W_1.shape}")

b_1 = np.random.normal(size=(3, 1))
print(f"Shape of b_1 is {b_1.shape}")

# parameters for the second layer
W_2 = np.random.normal(size=(1, 3))
print(f"Shape of W_2 is {W_2.shape}")

b_2 = np.random.normal(size=(1, 1))
print(f"Shape of b_1 is {b_2.shape}")

# calculate the forward propagation
Z_1 = X @ W_1.T
print(f"\nShape of Z_1 is {Z_1.shape}")
print("Samples for Z_1:")
print(Z_1[:5])

A_1 = sigmoid(Z_1 + b_1.T) 
print(f"Shape of A_1 is {A_1.shape}")
print("Samples for A_1:")
print(A_1[:5])

Z_2 = A_1 @ W_2.T
print(f"\nShape of Z_2 is {Z_2.shape}")
print("Samples for Z_2:")
print(Z_1[:5])

A_2 = Y_hat = sigmoid(Z_2 + b_2.T)
print(f"Shape of A_2 is {A_2.shape}")
print("Samples for A_2:")
print(A_2[:5])

Shape of W_1 is (3, 30)
Shape of b_1 is (3, 1)
Shape of W_2 is (1, 3)
Shape of b_1 is (1, 1)

Shape of Z_1 is (569, 3)
Samples for Z_1:
[[ 11.69461511  10.42525945 -10.74728969]
 [  0.8671148   -0.73907009   0.6649427 ]
 [  6.66350261   6.35817103  -0.92203087]
 [ 18.2359918   14.76409539 -20.30563484]
 [  1.25147542   0.60716681  -0.41933106]]
Shape of A_1 is (569, 3)
Samples for A_1:
[[9.99997246e-01 9.99976764e-01 2.02122454e-05]
 [8.78128212e-01 3.78827911e-01 6.46349369e-01]
 [9.99578475e-01 9.98645158e-01 2.72110126e-01]
 [9.99999996e-01 9.99999697e-01 1.42720640e-09]
 [9.13661430e-01 7.00925895e-01 3.81958631e-01]]

Shape of Z_2 is (569, 1)
Samples for Z_2:
[[ 11.69461511  10.42525945 -10.74728969]
 [  0.8671148   -0.73907009   0.6649427 ]
 [  6.66350261   6.35817103  -0.92203087]
 [ 18.2359918   14.76409539 -20.30563484]
 [  1.25147542   0.60716681  -0.41933106]]
Shape of A_2 is (569, 1)
Samples for A_2:
[[0.37191201]
 [0.43023923]
 [0.43172821]
 [0.371911  ]
 [0.42040167]]


In [5]:
def forward_prop(
    X: np.array,
    W_1: np.array,
    b_1: np.array,
    W_2: np.array,
    b_2: np.array,
) -> Tuple:
    """Performs the forward propagation of the given NN."""
    # Note the NN structure is passed in from outside.
    Z_1 = X @ W_1.T
    A_1 = sigmoid(Z_1 + b_1.T)
    
    Z_2 = A_1 @ W_2.T
    A_2 = Y = sigmoid(Z_2 + b_2.T)
    
    return A_2, Z_2, A_1, Z_1

Y_hat, _, _, _ = forward_prop(X=X, W_1=W_1, b_1=b_1, W_2=W_2, b_2=b_2)

In [6]:
def derivatives_by_backprop(
    X: np.array,
    Y: np.array,
    W_1: np.array,
    b_1: np.array,
    W_2: np.array,
    b_2: np.array,
) -> Tuple:
    """Calculates the derivatives of the parameters by backforward propagation.
    
    Here we assume it is a binary classification problem, with sigmoid activation functions.
    """
    # forward propagation
    dW_2, db_2, dW_1, db_1 = 0, 0, 0, 0
    Y_hat, Z_2, A_1, Z_1 = forward_prop(X=X, W_1=W_1, b_1=b_1, W_2=W_2, b_2=b_2)
    n = len(Y_hat)
    
    loss = -np.mean(np.multiply(Y, np.log(Y_hat)) + np.multiply(1 - Y, np.log(1 - Y_hat)))
    
    dZ_2 = Y_hat - Y
    dW_2 = dZ_2.T @ A_1 / n
    db_2 = np.mean(dZ_2.T, axis=1, keepdims=True) 
    
    dZ_1 = np.multiply(dZ_2 @ W_2, np.multiply(A_1, 1 - A_1))
    dW_1 = (dZ_1.T @ X) / n
    db_1 = np.mean(dZ_1.T, axis=1, keepdims=True) 
    
    return dW_2, db_2, dW_1, db_1, loss

dW_2, db_2, dW_1, db_1, loss = derivatives_by_backprop(X=X, Y=Y, W_1=W_1, b_1=b_1, W_2=W_2, b_2=b_2)    

In [7]:
def gradient_descent(
    X: np.array,
    Y: np.array,
    W_1_init: np.array,
    b_1_init: np.array,
    W_2_init: np.array,
    b_2_init: np.array,
    learning_rate: float = 0.01,
    epsilon: float = 1e-6,
):
    """Runs gradient descent to fit the NN via backprop."""
    pass