In [9]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from tqdm import tqdm 

# 0. Notation

m: number of examples

$n_x$: number of variables(features)

$n_y$: number of output(classes) 

x.shape: (m, n_x)

y.shape: (m, n_y)

W: (n, m)

b: (1,)

# 1. Forward Propagation

## 1-1. Initialize parameters
- Weights and biases are parameters that model connections between different layers
- Define parameter shape: keep the implementation as close as possible to the mathmatical calculations

$$X = 
\left[
\begin{matrix} 
x_{1}^{(1)} & x_{2}^{(1)} & \dots & x_{n}^{(1)} \\
x_{1}^{(2)} & x_{2}^{(2)} & \dots & x_{n}^{(2)} \\
\vdots & \vdots & \vdots & \vdots \\
x_{1}^{(m)} & x_{2}^{(m)} & \dots & x_{n}^{(m)} \\
\end{matrix}
\right] \in \R^{m \times n} $$

$$W =
\left[
\begin{matrix}
w_1 \\
w_2 \\
\vdots \\
w_n \\
\end{matrix}
\right]
\in \R^{n \times 1}$$

$$b \in \R$$

In [10]:
def initialize_weights(X):
    return np.random.randn(X.shape[1], 1)

def initialize_bias():
    """create a column vector as a matrix"""
    return np.random.randn(1)


## 1-2. Update neuron states

$$Z = X \cdot W + b$$
$$ A = \frac {1} {1 + e^{-Z}}

### 1-2-1. Activation function

In [11]:
def sigmoid(Z):
    A = 1 / (1 + np.exp(-Z))
    return A

### 1-2-2. Forward

In [12]:
def forward(X, W, b):
    Z = np.dot(X, W) + b
    A = sigmoid(Z)
    return A

### 1-2-3. Predict

In [13]:
def predict(X, W, b):
    A = forward(X, W, b)
    return (A >= 0.5).astype(int)

# 2. Backward Propagation

## 2-1. Evaluate performance - Loss

$$L = - \frac {1} {m} \sum y \log(A) + (1 - y) \log(1 - A)$$

In [14]:
def log_loss(A, y, eps=1e-15):
    A = np.clip(A, eps, 1 - eps)
    return - 1/len(y) * np.sum(y * np.log(A) + (1 - y) * np.log(1 - A))

## 2-2. Backward - gradient(derivative)

$$\frac {\partial {\mathcal{L}}} {\partial {W}} =
\frac {1} {m} X^T \cdot (A - y)$$

$$\frac {\partial {\mathcal{L}}} {\partial {b}} =
\frac {1} {m} \sum(A - y)$$

In [15]:
def gradients(X, A, y):
    dW = 1 / len(y) * np.dot(X.T, A - y)
    db = 1 / len(y) * np.sum(A - y)
    return dW, db

## 2-3. Update - Vectorized gradient descent

$$\begin{bmatrix}
w_1 \\
\\
w_2 \\
\end{bmatrix}

= \begin{bmatrix}
w_1 \\
\\
w_2 \\
\end{bmatrix}

- \alpha 
\begin{bmatrix}
\frac {\partial {\mathcal{L}}} {\partial {w_1}} \\
\\
\frac {\partial {\mathcal{L}}} {\partial {w_2}} \\
\end{bmatrix}, \space
W = \begin{bmatrix}
w_1 \\
\\
w_2 \\
\end{bmatrix} \space

\frac {\partial {\mathcal{L}}} {\partial {W}} = 
\begin{bmatrix}
\frac {\partial {\mathcal{L}}} {\partial {w_1}} \\
\\
\frac {\partial {\mathcal{L}}} {\partial {w_2}} \\
\end{bmatrix}$$

$$b = b - \alpha
\frac {\partial {\mathcal{L}}} {\partial {b}}$$

$$ W = W - \alpha
\frac {\partial {\mathcal{L}}} {\partial {W}}$$

$$b = b - \alpha
\frac {\partial {\mathcal{L}}} {\partial {b}}$$

In [16]:
def update(dW, db, W, b, lr):
    W = W - lr * dW
    b = b - lr * db
    return W, b

# 3. Neural Network - 1 layer perceptron

In [42]:
def perceptron(x_train, y_train, x_test=None, y_test=None, lr=0.01, epochs=1000):
    W = initialize_weights(x_train)
    b = initialize_bias()

    train_loss = []
    train_acc = []
    test_loss = []
    test_acc = []

    for epoch in tqdm(range(epochs)):
        A = forward(x_train, W, b)

        if epoch % 10 == 0:
            # trian loss
            train_loss.append(log_loss(A, y_train))
            # accuracy
            y_pred = predict(x_train, W, b)
            train_acc.append(accuracy_score(y_train, y_pred))

            if x_test is not None and y_test is not None:
                # test loss
                A_test = forward(x_test, W, b)
                test_loss.append(log_loss(A_test, y_test))
                # accuracy
                y_pred = predict(x_test, W, b)
                test_acc.append(accuracy_score(y_test, y_pred))

        # update
        dW, db = gradients(x_train, A, y_train)
        W, b = update(dW, db, W, b, lr)

    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(train_loss, label="train loss")
    plt.plot(test_loss , label="test loss")
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(train_acc, label="train_acc")
    plt.plot(test_acc, label="test_acc")
    plt.legend()
    plt.show()

    return W, b


### Learning Curves

In [None]:
def plot_learning_curves(X, y, W, b, train_loss, train_acc):
    # Generate input data for decision boundary plot
    x1_range = np.linspace(-1.5, 1.5, 100)
    x2_range = np.linspace(-1.5, 1.5, 100)
    x1_grid, x2_grid = np.meshgrid(x1_range, x2_range)

    # Calculate output for each input pair
    z_grid = np.array([[predict(np.array([[x1], [x2]]), W, b)[0, 0] for x1 in x1_range] for x2 in x2_range])

    plt.figure(figsize=(16, 4))
    plt.subplot(1, 3, 1)
    plt.plot(train_loss, label="train loss")
    plt.legend()
    plt.subplot(1, 3, 2)
    plt.plot(train_acc, label="train_acc")
    plt.legend()
    
    # Plotting decision boundary
    plt.subplot(1, 3, 3)
    plt.contourf(x1_grid, x2_grid, z_grid, levels=50, cmap='viridis', alpha=0.7)
    plt.colorbar()

    # Plot the dataset points
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap="summer", edgecolor='k')
    plt.title("Decision Boundary")
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.show()