In [26]:
import timeit
import numpy as np

# 배치 경사 하강법
def batch_gradient_descent(X : np.ndarray, y : np.ndarray, learning_rate=0.01, epochs=1):
    # X: (N, D) N개의 데이터, D차원
    # y: (N, 1) N개의 데이터, 1차원
    N, D = X.shape
    w = np.random.randn(D, 1) # (D, 1)
    b = np.random.randn(1) # (1, )
    for epoch in range(epochs):
        y_hat = X @ w + b # (N, 1)
        error = y_hat - y # (N, 1)
        w = w - learning_rate * (X.T @ error) / N # (D, 1)
        b = b - learning_rate * np.sum(error) / N # (1, )
    return w, b

# 확률적 경사 하강법
def stochastic_gradient_descent(X : np.ndarray, y : np.ndarray, learning_rate=0.01, epochs=1):
    # X: (N, D) N개의 데이터, D차원
    # y: (N, 1) N개의 데이터, 1차원
    N, D = X.shape
    w = np.random.randn(D, 1) # (D, 1)
    b = np.random.randn(1) # (1, )
    for epoch in range(epochs):
        for i in range(N):
            X_i = X[i:i+1] # (1, D)
            y_i = y[i:i+1] # (1, 1)
            y_hat = X_i @ w + b # (1, 1)
            error = y_hat - y_i # (1, 1)
            w = w - learning_rate * (X_i.T @ error) # (D, 1)
            b = b - learning_rate * error # (1, )
    return w, b

# 미니 배치 경사 하강법
def mini_batch_gradient_descent(X : np.ndarray, y : np.ndarray, learning_rate=0.01, epochs=1, batch_size=1000):
    # X: (N, D) N개의 데이터, D차원
    # y: (N, 1) N개의 데이터, 1차원
    N, D = X.shape
    w = np.random.randn(D, 1) # (D, 1)
    b = np.random.randn(1) # (1, )
    for epoch in range(epochs):
        for i in range(0, N, batch_size):
            X_batch = X[i:i+batch_size] # (batch_size, D)
            y_batch = y[i:i+batch_size] # (batch_size, 1)
            y_hat = X_batch @ w + b # (batch_size, 1)
            error = y_hat - y_batch # (batch_size, 1)
            w = w - learning_rate * (X_batch.T @ error) / batch_size # (D, 1)
            b = b - learning_rate * np.sum(error) / batch_size # (1, )
    return w, b

# 경사 하강법 데이터 셋
X = np.random.randn(10000, 3)
y = X @ np.array([1, 2, 3]).reshape(-1, 1) + 4

print(timeit.timeit(lambda: batch_gradient_descent(X, y), number=10))
print(timeit.timeit(lambda: stochastic_gradient_descent(X, y), number=10))
print(timeit.timeit(lambda: mini_batch_gradient_descent(X, y), number=10))

0.004355699988082051
0.8243008999852464
0.002524400013498962
