In [None]:
from typing import Tuple

import numpy as np
import pandas as pd

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
data = pd.read_csv("https://raw.githubusercontent.com/changyaochen/MECE4520/master/data/breast_cancer.csv")
data["label"] = data["diagnosis"].apply(lambda x: 0 if x == "B" else 1)
data.head()

## Forward propagation of a 2-layer NN

In [None]:
features = [
    "radius_mean",
    "texture_mean",
    "perimeter_mean",
    "area_mean",
    "smoothness_mean",
    "compactness_mean",
    "concavity_mean",
    "concave_mean",
    "symmetry_mean",
    "fractal_mean",
]
label = "label"

# train test split
X_raw, X_raw_test, Y, Y_test = train_test_split(data[features].values, data[label].values, test_size=0.2, random_state=42)

# Standardize the input
scaler = StandardScaler()
scaler.fit(X_raw)
X = scaler.transform(X_raw)
X_test = scaler.transform(X_raw_test)

# formatting
Y = Y.reshape((-1, 1))
Y_test = Y_test.reshape((-1, 1))

In [None]:
# forward pass for a simple 2-layer NN, with 3 hidden units
np.random.seed(10)

def sigmoid(x):
    """Calculates sigmoid function."""
    return 1. / (1 + np.exp(-x))

# parameters for the first layer
W_1 = np.random.normal(size=(3, X.shape[1]))
print(f"Shape of W_1 is {W_1.shape}")

b_1 = np.random.normal(size=(3, 1))
print(f"Shape of b_1 is {b_1.shape}")

# parameters for the second layer
W_2 = np.random.normal(size=(1, 3))
print(f"Shape of W_2 is {W_2.shape}")

b_2 = np.random.normal(size=(1, 1))
print(f"Shape of b_1 is {b_2.shape}")

# calculate the forward propagation
Z_1 = X @ W_1.T
print(f"\nShape of Z_1 is {Z_1.shape}")
print("Samples for Z_1:")
print(Z_1[:5])

A_1 = sigmoid(Z_1 + b_1.T) 
print(f"Shape of A_1 is {A_1.shape}")
print("Samples for A_1:")
print(A_1[:5])

Z_2 = A_1 @ W_2.T
print(f"\nShape of Z_2 is {Z_2.shape}")
print("Samples for Z_2:")
print(Z_1[:5])

A_2 = Y_hat = sigmoid(Z_2 + b_2.T)
print(f"Shape of A_2 is {A_2.shape}")
print("Samples for A_2:")
print(A_2[:5])

## Training a NN with backward propagation

In [None]:
def forward_prop(
    X: np.array,
    W_1: np.array,
    b_1: np.array,
    W_2: np.array,
    b_2: np.array,
) -> Tuple:
    """Performs the forward propagation of the given NN."""
    # Note the NN structure is passed in from outside.
    Z_1 = X @ W_1.T
    A_1 = sigmoid(Z_1 + b_1.T)
    
    Z_2 = A_1 @ W_2.T
    A_2 = Y = sigmoid(Z_2 + b_2.T)
    
    return A_2, Z_2, A_1, Z_1

Y_hat, _, _, _ = forward_prop(X=X, W_1=W_1, b_1=b_1, W_2=W_2, b_2=b_2)

In [None]:
def derivatives_by_backprop(
    X: np.array,
    Y: np.array,
    W_1: np.array,
    b_1: np.array,
    W_2: np.array,
    b_2: np.array,
) -> Tuple:
    """Calculates the derivatives of the parameters by backforward propagation.
    
    Here we assume it is a binary classification problem, with sigmoid activation functions.
    """
    # forward propagation
    dW_2, db_2, dW_1, db_1 = 0, 0, 0, 0
    Y_hat, Z_2, A_1, Z_1 = forward_prop(X=X, W_1=W_1, b_1=b_1, W_2=W_2, b_2=b_2)
    n = len(Y_hat)
    
    loss = -np.mean(np.multiply(Y, np.log(Y_hat)) + np.multiply(1 - Y, np.log(1 - Y_hat)))
    
    dZ_2 = Y_hat - Y
    dW_2 = dZ_2.T @ A_1 / n
    db_2 = np.mean(dZ_2.T, axis=1, keepdims=True) 
    
    dZ_1 = np.multiply(dZ_2 @ W_2, np.multiply(A_1, 1 - A_1))
    dW_1 = (dZ_1.T @ X) / n
    db_1 = np.mean(dZ_1.T, axis=1, keepdims=True) 
    
    return dW_2, db_2, dW_1, db_1, loss

dW_2, db_2, dW_1, db_1, loss = derivatives_by_backprop(X=X, Y=Y, W_1=W_1, b_1=b_1, W_2=W_2, b_2=b_2)    

In [None]:
def gradient_descent(
    X: np.array,
    Y: np.array,
    W_1_init: np.array,
    b_1_init: np.array,
    W_2_init: np.array,
    b_2_init: np.array,
    learning_rate: float = 0.01,
    epsilon: float = 1e-6,
    verbose: bool = False,
) -> Tuple:
    """Runs gradient descent to fit the NN via backprop."""
    W_1 = W_1_init
    b_1 = b_1_init
    W_2 = W_2_init
    b_2 = b_2_init
    losses = [float("inf"), ]
    roc_auc_scores = [0.5, ]
    
    diff_in_loss = float("inf")
    iteration = 0
    while abs(diff_in_loss) > epsilon:
        iteration += 1
        dW_2, db_2, dW_1, db_1, loss = derivatives_by_backprop(
            X=X, Y=Y, W_1=W_1, b_1=b_1, W_2=W_2, b_2=b_2
        ) 
        
        W_1 -= learning_rate * dW_1
        b_1 -= learning_rate * db_1
        W_2 -= learning_rate * dW_2
        b_2 -= learning_rate * db_2
        
        losses.append(loss)
        diff_in_loss = losses[-1] - losses[-2]
        
        Y_hat, _, _, _ = forward_prop(X=X, W_1=W_1, b_1=b_1, W_2=W_2, b_2=b_2)
        roc_auc = roc_auc_score(y_true=Y, y_score=Y_hat)
        roc_auc_scores.append(roc_auc)
        
        if verbose and iteration % 10 == 0:
            print(loss, roc_auc)
    return W_1, b_1, W_2, b_2, losses

In [None]:
# parameters for the first layer
np.random.seed(42)
W_1_init = np.random.normal(size=(3, X.shape[1]))
b_1_init = np.random.normal(size=(3, 1))

# parameters for the second layer
W_2_init = np.random.normal(size=(1, 3))
b_2_init = np.random.normal(size=(1, 1))

W_1, b_1, W_2, b_2, losses = gradient_descent(
    X=X,
    Y=Y,
    W_1_init=W_1_init,
    b_1_init=b_1_init,
    W_2_init=W_2_init,
    b_2_init=b_2_init,
    learning_rate=0.1,
    epsilon=1e-3,
    verbose=True,
)

In [None]:
# evaluate the model on the test set
Y_test_hat, _, _, _ = forward_prop(X=X_test, W_1=W_1, b_1=b_1, W_2=W_2, b_2=b_2)
roc_auc_score(y_true=Y_test, y_score=Y_test_hat)

In [None]:
# train a NN with Keras
from tensorflow import keras
from tensorflow.keras import layers

def keras_model(nn_size: int, num_features: int, num_layers: int):
    """Creates a simple Keras model."""
    inputs = keras.Input(
        shape=(num_features, ), name="inputs")
    x = inputs
    for i in range(num_layers):
        x = layers.Dense(
            nn_size, activation="sigmoid", name=f"desnse_layer_{i}")(x)

    outputs = layers.Dense(
        1, activation="sigmoid", name="output")(x)

    model = keras.Model(
        inputs=inputs, outputs=outputs, name="simple_model")
    model.compile(
        optimizer="adam",
        loss="binary_crossentropy",
        metrics=["AUC"])
    return model

model = keras_model(nn_size=3, num_features=X.shape[1], num_layers=1)
history = model.fit(
    x=X,
    y=Y,
    batch_size=32,
    epochs=20,
    validation_data=(X_test, Y_test),
    verbose=1,
    shuffle=True,
)

In [None]:
# evaluate the model on the test set
roc_auc_score(y_true=Y_test, y_score=model.predict(X_test))