In [7]:
import numpy as np

In [10]:
def compute_loss(w, b, x, y):
    y_pred = w * x + b
    loss = 0.5 * (y - y_pred) ** 2
    return loss

def compute_gradients(w, b, x, y):
    y_pred = w * x + b
    dw = -(y - y_pred) * x
    db = -(y - y_pred)
    return dw, db

def gradient_descent(x, y, learning_rate = 0.1, num_iteration = 100):
    w = 0
    b = 0
    loss_history = []
    for i in range(num_iteration):
        dw, db = 0, 0
        total_loss = 0
        for j in range(len(x)):
            dw_i, db_i = compute_gradients(w, b, x[j], y[j])
            dw += dw_i
            db += db_i
            total_loss += compute_loss(w, b, x[j], y[j])

        dw /= len(x)
        db /= len(x)

        w += dw
        b += db

        loss_history.append(total_loss/len(x))
        if i % 10 == 0:
            print(f"Iteration {i}: Loss = {total_loss/len(x)}, w = {w}, b = {b}")
    return w, b, loss_history

In [8]:
def compute_loss(w, b, x, y):
    """ 
    计算二次损失函数
    """
    y_pred = w * x + b
    loss = 0.5 * (y - y_pred) ** 2
    return loss

def compute_gradients(w, b, x, y):
    y_pred = w * x + b
    dw = -(y - y_pred) * x
    db = -(y - y_pred)
    return dw, db

def gradient_descent(x, y, learning_rate = 0.1, num_iteration = 100):
    w = 0
    b = 0
    loss_history = []

    for i in range(num_iteration):
        dw, db = 0, 0
        total_loss = 0
        for j in range(len(x)):
            dw_i, db_i = compute_gradients(w, b, x[j], y[j])
            dw += dw_i
            db += db_i
            total_loss += compute_loss(w, b, x[j], y[j])

        # 求平均梯度
        dw /= len(x)
        db /= len(x)

        # 更新权重和偏置
        w -= learning_rate * dw
        b -= learning_rate * db

        average_loss = total_loss / len(x)
        loss_history.append(average_loss)

        if i % 10 == 0:
            print(f"Iteration {i}: Loss = {average_loss}, w = {w}, b = {b}")

    return w, b, loss_history    

In [11]:
if __name__ == "__main__":
    x_data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    y_data = np.array([3, 5, 7, 9, 11, 13, 15, 17, 19, 21]) # y = 2x + 1
    # 执行梯度下降
    w, b, loss_history = gradient_descent(x_data, y_data, learning_rate=0.05, num_iteration=100)
    print(f"Final parameters: w = {w}, b = {b}")
    print(f"loss history {loss_history}")

Iteration 0: Loss = 88.5, w = -82.5, b = -12.0
Iteration 10: Loss = 1.1236656084313656e+34, w = -9.537953405481129e+17, b = -1.3700368811347648e+17
Iteration 20: Loss = 1.4275370918865465e+66, w = -1.075054181631394e+34, b = -1.5442137484198218e+33
Iteration 30: Loss = 1.8135841601103636e+98, w = -1.2117290201679765e+50, b = -1.7405342393656587e+49
Iteration 40: Loss = 2.304029453593078e+130, w = -1.3657797378073718e+66, b = -1.9618135387696199e+65
Iteration 50: Loss = 2.9271052536659635e+162, w = -1.5394153817877433e+82, b = -2.211224734253118e+81
Iteration 60: Loss = 3.718678662148774e+194, w = -1.735125842098952e+98, b = -2.4923443175131204e+97
Iteration 70: Loss = 4.7243162762942775e+226, w = -1.955717555857652e+114, b = -2.8092034702832174e+113
Iteration 80: Loss = 6.001907211192677e+258, w = -2.2043537508858704e+130, b = -3.166345870431578e+129
Iteration 90: Loss = 7.624995462840322e+290, w = -2.484599805575544e+146, b = -3.568892847120227e+145
Final parameters: w = -6.9507887186

  loss = 0.5 * (y - y_pred) ** 2
