In [None]:
import copy
import math

import numpy as np
%matplotlib widget
import matplotlib.pyplot as plt
from plt_one_addpt_onclick import plt_one_addpt_onclick
from lab_utils_common import draw_vthresh

plt.style.use('./deeplearning.mplstyle')

# Sigmoid or Logistic Function
$$
g(z) = \frac{1}{1 + e^{-z}}\\
z = \mathbf{w} \cdot \mathbf{x} + b \\
由上式可知: \\
z \in (-\infty,+\infty)，且\lim_{z \to +\infty}g(z) = 1\\
\lim_{z \to -\infty}g(z) = 0
$$


In [None]:
# Input is an array.
input_array = np.array([1, 2, 3])
exp_array = np.exp(input_array)

print("Input to exp:", input_array)
print("Output of exp:", exp_array)

# Input is a single number
input_val = 1
exp_val = np.exp(input_val)

print("Input to exp:", input_val)
print("Output of exp:", exp_val)

In [None]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [None]:
# Generate an array of evenly spaced values between -10 and 10
z_tmp = np.arange(-10, 11)

# Use the function implemented above to get the sigmoid values
y = sigmoid(z_tmp)

# Code for pretty printing the two arrays next to each other
np.set_printoptions(precision=3)
print("Input (z), Output (sigmoid(z))")
print(np.c_[z_tmp, y])

In [None]:
# Plot z vs sigmoid(z)
fig, ax = plt.subplots(1, 1, figsize=(5, 3))
ax.plot(z_tmp, y, c="b")

ax.set_title("Sigmoid function")
ax.set_ylabel('sigmoid(z)')
ax.set_xlabel('z')
draw_vthresh(ax, 0)

# Logistic Regression
$$
f_{\mathbf{w},b}(\mathbf{x})=g(\mathbf{w} \cdot \mathbf{x} + b)=\frac{1}{1+e^{-(\mathbf{w} \cdot \mathbf{x} + b)}}\\
=P(y=1|x;\mathbf{w},b)
$$

# Logistic Loss Function
$$
L(f_{\mathbf{w}, b}(\mathbf{x^{(i)}}, y^{(i)}))=\left\{
		\begin{aligned}
			&-\log(f_{\mathbf{w},b}(\mathbf{x})) &y^{(i)} = 1 \\
			&-\log(1-f_{\mathbf{w},b}(\mathbf{x})) &y^{(i)} = 0
		\end{aligned}
		\right.
$$
损失函数可以简写为
$$
L(f_{\mathbf{w}, b}(\mathbf{x^{(i)}}, y^{(i)}))=(-y^{(i)}\log(f_{\mathbf{w},b}(\mathbf{x^{(i)}}))) - (1 - y^{(i)})\log(1 - f_{\mathbf{w},b}(\mathbf{x^{(i)}}))
$$
当$y^{(i)}=1$时，上式变为
$$
    L(f_{\mathbf{w}, b}(\mathbf{x^{(i)}}, y^{(i)}))=(-(1)\log(f_{\mathbf{w},b}(\mathbf{x^{(i)}}))) - (1 - 1)\log(1 - f_{\mathbf{w},b}(\mathbf{x^{(i)}}))
    = -\log(f_{\mathbf{w},b}(\mathbf{x}))
$$
当$y^{(i)}=0$时，上式即为
$$
    L(f_{\mathbf{w}, b}(\mathbf{x^{(i)}}, y^{(i)}))=(-(0)^{(i)}\log(f_{\mathbf{w},b}(\mathbf{x^{(i)}}))) - (1 - 0)log(1 - f_{\mathbf{w},b}(\mathbf{x^{(i)}})) =-\log(1-f_{\mathbf{w},b}(\mathbf{x}))
$$
最后，损失函数为
$$
J(\mathbf{w}, b) = \frac{1}{m}\sum_{i=1}^{m}L(f_{\mathbf{w}, b}(\mathbf{x^{(i)}}), y^{(i)}) = \frac{1}{m}\sum_{i=1}^{m}[(-y^{(i)}\log(f_{\mathbf{w},b}(\mathbf{x^{(i)}}))) - (1 - y^{(i)})\log(1 - f_{\mathbf{w},b}(\mathbf{x^{(i)}}))]
$$


In [None]:
def compute_cost_logistic(X: np.ndarray, y: np.ndarray, w, b):
    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        f = sigmoid(np.dot(X[i], w) + b)
        cost += (-y[i]*np.log(f) - (1-y[i])*np.log(1-f))
    return cost / m

In [None]:
X_train = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])  #(m,n)
y_train = np.array([0, 0, 0, 1, 1, 1])

In [None]:
w_tmp = np.array([1,1])
b_tmp = -3
print(compute_cost_logistic(X_train, y_train, w_tmp, b_tmp))

# 梯度下降公式
$$
w_j = w_j - \alpha[\frac{1}{m} \sum_{i=1}^{m} (f_{\mathbf{w},b}(\mathbf{x^{(i)}}) - y^{(i)})x_j^{(i)}]
$$

In [None]:
def compute_gradient_logistic(X: np.ndarray, y: np.ndarray, w, b):
    m, n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0
    for i in range(m):
        f = sigmoid(np.dot(X[i], w) + b)
        error = f - y[i]
        for j in range(n):
            dj_dw[j] += error * X[i, j]
        dj_db += error
    return dj_dw / m, dj_db / m

In [None]:
def gradient_descent(X: np.ndarray, y: np.ndarray, w_in, b_in, alpha, num_iterations):
    w = copy.deepcopy(w_in)
    b = b_in
    J_history = []
    for i in range(num_iterations):
        dj_dw, dj_db = compute_gradient_logistic(X, y, w, b)
        w -= alpha * dj_dw
        b -= alpha * dj_db

        if i < 100000:
            J_history.append(compute_cost_logistic(X, y, w, b))

        if i % math.ceil(num_iterations / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]}")
    return w, b, J_history

In [None]:
w_tmp  = np.zeros_like(X_train[0])
b_tmp  = 0.
alph = 0.1
iters = 10000

w_out, b_out, J_history = gradient_descent(X_train, y_train, w_tmp, b_tmp, alph, iters)
print(f"\nupdated parameters: w:{w_out}, b:{b_out}")