In [1]:
%matplotlib inline
import numpy as np
from matplotlib import pyplot as plt

In [2]:
dataset = np.array([
    [1, 7],
    [2, 13],
    [3, 17],
    [4, 22],
    [5, 27],
    [6, 33],
    [7, 38],
    [8, 42],
    [9, 46],
    [10, 52]
])

In [3]:
x = dataset[:, 0:1]
y = dataset[:, 1:]

## 批量梯度下降算法

In [4]:
def BatchGradientDescent(x, y, step=0.001, iter_count=500):
    length, features = x.shape
    data = np.column_stack((x, np.ones((length, 1))))
    w = np.zeros((features + 1, 1))
    for i in range(iter_count):
        new_w = w.copy()
        for feature in range(features + 1):
            new_w[feature] = np.sum((np.dot(data, w) - y) * data[:, feature]) / length
        w -= step * new_w        
    return w

In [5]:
BatchGradientDescent(x, y, iter_count=500)

array([[5.2272],
       [0.9504]])

In [6]:
def BatchGradientDescent(x, y, step=0.001, iter_count=500):
    length, features = x.shape
    data = np.column_stack((x, np.ones((length, 1))))
    w = np.zeros((features + 1, 1))
    for i in range(iter_count):        
        new_w = np.sum((np.dot(data, w) - y) * data, axis=0).reshape((features + 1, 1)) / length        
        w -= step * new_w
    return w

In [7]:
BatchGradientDescent(x, y, iter_count=500)

array([[5.17049008],
       [0.92448297]])

## 小批量梯度下降算法

In [8]:
def MiniBatchGradientDescent(x, y, step=0.001, iter_count=500, batch_size=4):
    length, features = x.shape
    data = np.column_stack((x, np.ones((length, 1))))
    np.random.shuffle(data)
    w = np.zeros((features + 1, 1))
    start, end = 0, batch_size
    for i in range(iter_count):        
        new_w = w.copy()
        for feature in range(features + 1):
            new_w[feature] = np.sum((np.dot(data[start:end], w) - y[start:end]) * data[start:end, feature]) / length
        w -= step * new_w
        start = (start + batch_size) % length
        end = (end + batch_size) % length
    return w

In [9]:
MiniBatchGradientDescent(x, y, iter_count=2000)

array([[3.14974219],
       [7.21219878]])

In [10]:
def MiniBatchGradientDescent(x, y, step=0.001, iter_count=500, batch_size=4):
    length, features = x.shape
    data = np.column_stack((x, np.ones((length, 1))))
    w = np.zeros((features + 1, 1))
    start, end = 0, batch_size
    for i in range(iter_count): 
        new_w = np.sum((np.dot(data[start:end], w) - y[start:end]) * data[start:end], axis=0).reshape((features + 1, 1)) / length
        w -= step * new_w
        start = (start + batch_size) % length
        end = (end + batch_size) % length
    return w

In [11]:
MiniBatchGradientDescent(x, y, iter_count=500)

array([[5.00225841],
       [0.95765833]])

## 随机梯度下降算法

In [12]:
def StochasticGradientDescent(x, y, step=0.001, iter_count=500):
    length, features = x.shape
    data = np.column_stack((x, np.ones((length, 1))))
    w = np.zeros((features + 1, 1))
#     random_ind = 0
    for i in range(iter_count):
        random_ind = np.random.randint(length)
#         random_ind = (random_ind + 1) % length
        new_w = w.copy()
        for feature in range(features + 1):                        
            new_w[feature] = (np.dot(data[random_ind:random_ind + 1], w) - y[random_ind]) * data[random_ind, feature] / length
        w -= step * new_w        
    return w

In [13]:
StochasticGradientDescent(x, y, iter_count=1000)

array([[5.09409174],
       [0.77844559]])

In [14]:
def StochasticGradientDescent(x, y, step=0.001, iter_count=500):
    length, features = x.shape
    data = np.column_stack((x, np.ones((length, 1))))
    w = np.zeros((features + 1, 1))
    for i in range(iter_count):
        random_ind = np.random.randint(length)           
        new_w = (np.dot(data[random_ind:random_ind + 1], w) - y[random_ind]) * data[random_ind] / length
        w -= step * new_w.reshape((features + 1, 1))
    return w

In [15]:
StochasticGradientDescent(x, y, iter_count=1000)

array([[5.08159849],
       [0.76944698]])