## SGD的代码实现

### 传统的梯度下降和随机梯度下降区别

- 传统的梯度下降训练过程

In [None]:
while True:
    Wgradient = evaluate_gradient(loss, data, W)
    W += -alpha * Wgradient

- 随机梯度下降的训练过程

In [None]:
while True:
    batch = next_training_batch(data, 256)
    Wgradient = evaluate_gradient(loss, batch, W)
    W += -alpha * Wgradient

- 传统梯度下降的具体过程

In [None]:
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
import numpy as np
import argparse
 
def sigmoid_activation(x):
    return 1.0 / (1 + np.exp(-x))

In [None]:
# 设置外部参数
ap = argparse.ArgumentParser()
ap.add_argument("-e", "--epochs", type=float, default=100,
    help="# of epochs")
ap.add_argument("-a", "--alpha", type=float, default=0.01,
    help="learning rate")
args = vars(ap.parse_args())

In [None]:
# 生成样本，包含250个二分类的样本，每个样本都是一个二维的向量
(X, y) = make_blobs(n_samples=250, n_features=2, centers=2,
    cluster_std=1.05, random_state=20)

X = np.c_[np.ones((X.shape[0])), X]
 
# 初始化权重
W = np.random.uniform(size=(X.shape[1],))
 
# 存储每次迭代的loss值
lossHistory = []

In [None]:
for epoch in np.arange(0, args["epochs"]):
    preds = sigmoid_activation(X.dot(W))

    error = preds - y
 
    loss = np.sum(error ** 2)
    lossHistory.append(loss)
    print("[INFO] epoch #{}, loss={:.7f}".format(epoch + 1, loss))

In [None]:
Y = (-W[0] - (W[1] * X)) / W[2]
 
plt.figure()
plt.scatter(X[:, 1], X[:, 2], marker="o", c=y)
plt.plot(X, Y, "r-")
 
fig = plt.figure()
plt.plot(np.arange(0, args["epochs"]), lossHistory)
fig.suptitle("Training Loss")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.show()

### 随机梯度下降过程

In [None]:
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
import numpy as np
import argparse
 
def sigmoid_activation(x):
    return 1.0 / (1 + np.exp(-x))
 
def next_training_batch(X, y, batchSize):
    for i in np.arange(0, X.shape[0], batchSize):
        yield (X[i:i + batchSize], y[i:i + batchSize])

In [None]:
ap = argparse.ArgumentParser()
ap.add_argument("-e", "--epochs", type=float, default=100,
    help="# of epochs")
ap.add_argument("-a", "--alpha", type=float, default=0.01,
    help="learning rate")
ap.add_argument("-b", "--batch-size", type=int, default=32,
    help="size of SGD mini-batches")
args = vars(ap.parse_args())

In [None]:
(X, y) = make_blobs(n_samples=400, n_features=2, centers=2,
    cluster_std=2.5, random_state=95)
X = np.c_[np.ones((X.shape[0])), X]
 

print("[INFO] starting training...")
W = np.random.uniform(size=(X.shape[1],))
 
lossHistory = []

In [None]:
for epoch in np.arange(0, args["epochs"]):
    epochLoss = []
 
    for (batchX, batchY) in next_training_batch(X, y, args["batch_size"]):

        preds = sigmoid_activation(batchX.dot(W))
 

        error = preds - batchY
 

        loss = np.sum(error ** 2)
        epochLoss.append(loss)
 

        gradient = batchX.T.dot(error) / batchX.shape[0]
 

        W += -args["alpha"] * gradient
 

    lossHistory.append(np.average(epochLoss))

In [None]:
Y = (-W[0] - (W[1] * X)) / W[2]
 
plt.figure()
plt.scatter(X[:, 1], X[:, 2], marker="o", c=y)
plt.plot(X, Y, "r-")
 
fig = plt.figure()
plt.plot(np.arange(0, args["epochs"]), lossHistory)
fig.suptitle("Training Loss")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.show()