In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_svmlight_file

In [2]:
data = load_svmlight_file("a9a.txt")
X_train = data[0].todense()
y_train = data[1]
data = load_svmlight_file("a9a.t")
X_test = data[0].todense()
y_test = data[1]

In [3]:
X_test = np.c_[X_test, np.zeros(X_test.shape[0])]      #验证集少了一个属性 补全为0
X_train = np.c_[X_train, np.ones(X_train.shape[0])]    #加一维 全为1
X_test = np.c_[X_test, np.ones(X_test.shape[0])]

In [4]:
C = 1
iter = 1000                               #迭代次数
dimension = X_train.shape[1]              #维数
regular_param = 1                         #正则化参数
batch_size = int(X_train.shape[0] / 325)      #batch大小

In [5]:
#w = np.zeros(dimension)                      #全零初始化
w = np.random.normal(size = (dimension))      #正态分布初始化
G = np.zeros(dimension)                       #梯度

In [6]:
def get_batch(X_train, y_train):
    #打乱矩阵 取batch
    random_sequence = np.arange(X_train.shape[0])
    np.random.shuffle(random_sequence)
    X_batch = np.zeros((batch_size, dimension))
    y_batch = np.zeros(batch_size)
    for i in range(batch_size):
        X_batch[i] = X_train[random_sequence[i], :]
        y_batch[i] = y_train[random_sequence[i]]
    return X_batch, y_batch

In [7]:
def gradient (X, y, w):
    g = np.zeros(X.shape[1])   #存放 hinge loss 的梯度
    for i in range(X.shape[0]):   #对每一条记录迭代一次
        judge = y[i] * np.dot(X[i], w.reshape(dimension, 1))   #分段函数 判断
        if (judge < 1):               #小于1 则更新g 否则 g = g + 0
            g = g - y[i] * X[i]
    G = w + C * g              #计算梯度G
    return G

In [8]:
def loss (X, y, w):
    loss = 0
    for i in range(X.shape[0]):
        judge = y[i] * np.dot(X[i], w.reshape(dimension, 1))    #分段函数 判断y * X * W与1的大小
        if (judge >= 1):
            loss = loss + 0                                     #大于1 hinge loss = 0
        else:
            loss = loss + 1 - judge                             #小于1 加上 1 - y * X * W
    loss = np.dot(w, w.reshape(dimension, 1)) / 2 + C * loss / X.shape[0]                #loss取平均
    return loss                     

In [9]:
def NAG(w, v, g, eta=0.05, gama=0.9):
    v = gama * v + eta * g
    w = w - v
    return w, v

In [10]:
def RMSProp(w, G, g, gama=0.9, eta=0.001, epsilon=1e-8):
    G = gama * G + (1 - gama) * np.dot(g, g.reshape(g.shape[1], 1))
    w = w - (eta / np.sqrt(G + epsilon)) * g
    return w, G

In [11]:
def AdaDelta(w, G, g, delta, gama=0.95, epsilon=1e-6):
    G = gama * G + (1-gama) * np.dot(g, g)
    delta_w = -(np.sqrt(delta + epsilon) / np.sqrt(G + epsilon)) * g
    w = w + delta_w
    delta = gama * delta + (1-gama) * np.dot(delta_w, delta_w.reshape(delta_w.shape[0], 1))
    return w, G, delta

In [12]:
def Adam(w, m, G, g, t, beta=0.9, gama=0.999, eta=0.002, epsilon=1e-8):
    m = beta * m + (1-beta) * g
    G = gama * G + (1-gama) * np.dot(g, g.reshape(g.shape[0], 1))
    a = eta * np.sqrt(1 - np.power(gama, t))
    w = w - (a / np.sqrt(G + epsilon)) * m
    t = t + 1
    return w, m, G, t

In [14]:
def NAG_run(X_train, y_train, X_test, y_test, w):
    gama = 0.9
    v = np.zeros(dimension)
    NAG_loss = np.zeros(iter)   #初始化 存放NAG法的验证集loss的向量
    for i in range(iter):           #迭代
        NAG_loss[i] = loss(X_test, y_test, w)
        g = gradient(X_train, y_train, w - gama * v)
        w, v = NAG(w, v, g)
    return NAG_loss

In [15]:
def RMSProp_run(X_train, y_train, X_test, y_test, w):
    G = 0
    RMSProp_loss = np.zeros(iter)   #初始化 存放NAG法的验证集loss的向量
    for i in range(iter):
        RMSProp_loss[i] = loss(X_test, y_test, w)
        g = gradient(X_train, y_train, w)
        w, G = RMSProp(w ,G, g)
    return RMSProp_loss

In [16]:
def AdaDelta_run(X_train, y_train, X_test, y_test, w):
    G = 0
    delta = 0
    AdaDelta_loss = np.zeros(iter)
    for i in range(iter):
        AdaDelta_loss[i] = loss(X_test, y_test, w)
        g = gradient(X_train, y_train, w)
        w, G, delta = AdaDelta(w, G, g, delta)
    return AdaDelta_loss

In [None]:
def Adam_run(X_train, y_train, X_test, y_test, w):
    m = np.zeros(dimension)
    G = 0
    t = 0
    Adam_loss = np.zeros(iter)
    for i in range(iter):
        Adam_loss[i] = loss(X_test, y_test, w)
        g = gradient(X_train, y_train, w)
        w, m, G, t = Adam(w, m, G, g, t)
    return Adam_loss

In [None]:
NAG_loss = NAG_run(X_train, y_train, X_test, y_test, w)
RMSProp_loss = RMSProp_run(X_train, y_train, X_test, y_test, w)
AdaDelta_loss = AdaDelta_run(X_train, y_train, X_test, y_test, w)
Adam_loss = Adam_run(X_train, y_train, X_test, y_test, w)

In [None]:
#loss图像
x = np.arange(iter)

plt.plot(x, NAG_loss.tolist(), label='NAG')
plt.plot(x, RMSProp_loss.tolist(), label='RMSProp')
plt.plot(x, AdaDelta_loss.tolist(), label='AdaDelta')
plt.plot(x, Adam_loss.tolist(), label='Adam')

plt.xlabel('iteration')
plt.ylabel('loss')

plt.title('Iteration-Loss')

plt.legend()

plt.show()