In [None]:
import numpy as np

# 배치 경사 하강법
def batch_gradient_descent(X, y, learning_rate=0.01, epochs=1000):
    # X: (N, D) N개의 데이터, D차원
    # y: (N, 1) N개의 데이터, 1차원
    N, D = X.shape
    w = np.random.randn(D, 1) # (D, 1)
    b = np.random.randn(1) # (1, )
    for epoch in range(epochs):
        y_hat = X @ w + b # (N, 1)
        error = y_hat - y # (N, 1)
        w = w - learning_rate * (X.T @ error) / N # (D, 1)
        b = b - learning_rate * np.sum(error) / N # (1, )
    return w, b

# 확률적 경사 하강법
def stochastic_gradient_descent(X, y, learning_rate=0.01, epochs=1000):
    # X: (N, D) N개의 데이터, D차원
    # y: (N, 1) N개의 데이터, 1차원
    N, D = X.shape
    w = np.random.randn(D, 1) # (D, 1)
    b = np.random.randn(1) # (1, )
    for epoch in range(epochs):
        for i in range(N):
            y_hat = X[i] @ w + b # (1, 1)
            error = y_hat - y[i] # (1, 1)
            w = w - learning_rate * (X[i].T @ error) # (D, 1)
            b = b - learning_rate * error # (1, )
    return w, b

# 미니 배치 경사 하강법
def mini_batch_gradient_descent(X, y, learning_rate=0.01, epochs=1000, batch_size=32):
    # X: (N, D) N개의 데이터, D차원
    # y: (N, 1) N개의 데이터, 1차원
    N, D = X.shape
    w = np.random.randn(D, 1) # (D, 1)
    b = np.random.randn(1) # (1, )
    for epoch in range(epochs):
        for i in range(0, N, batch_size):
            X_batch = X[i:i+batch_size] # (batch_size, D)
            y_batch = y[i:i+batch_size] # (batch_size, 1)
            y_hat = X_batch @ w + b # (batch_size, 1)
            error = y_hat - y_batch # (batch_size, 1)
            w = w - learning_rate * (X_batch.T @ error) / batch_size # (D, 1)
            b = b - learning_rate * np.sum(error) / batch_size # (1, )
    return w, b