In [1]:
# Xet ham so f(x) = x ^ 2 + 5sin(x) voi dao ham f'(x) = 2x + 5cos(x). Quy tac gradient descent x_(t + 1) = x_t - n(2x_t + 5cos(x_t))
import numpy as np

In [2]:
# Tinh dao ham f'(x)
def grad(x):
    return 2 * x + 5 * np.cos(x)

# Tinh f(x)
def cost(x):
    return x ** 2 + 5 * np.sin(x)

def myGD1(x0, theta):
    x = [x0]
    for it in range(1000):
        x_new = x[-1] - theta * grad(x[-1])
        if abs(grad(x_new)) < 1e-3:
            break
        x.append(x_new)
    return (x, it)

(x1, it1) = myGD1(-5, .1)
(x2, it2) = myGD1(5, .1)
print(x1, it1)
print(x2, it2)

[-5, -4.141831092731613, -3.0434140487394945, -1.9371390635788721, -1.370609623535342, -1.1959138533062952, -1.1398126662660861, -1.1207324901805855, -1.1140974995041208, -1.1117718342401366, -1.1109543623859697, -1.1106667365268623] 11
[5, 3.8581689072683867, 3.463564567930569, 3.2451582916682646, 3.0934475688734215, 2.9741786797296776, 2.8723524342019475, 2.7798685851337033, 2.691538912182054, 2.6034429924417726, 2.512083663118539, 2.413825273788166, 2.3043909242955314, 2.178284700900974, 2.028031263811057, 1.8431593967550366, 1.6090315913519224, 1.3063382475764564, 0.9143774850440367, 0.42636006838025575, -0.11415049832376245, -0.5880663503407273, -0.8864605464168874, -1.0252476809653677, -1.079641732011138, -1.0995355411928174, -1.1066334337506414, -1.1091439570842945, -1.1100292207856688, -1.1103410483948122] 29


In [12]:
# Gradient descent nhieu bien 
# Tao 1000 diem du lieu quanh duong thang y = 4 + 3x
# Su dung scikit-learn
from sklearn.linear_model import LinearRegression
X = np.random.rand(1000, 1)
y = 4 + 3 * X + .5 * np.random.rand(1000, 1)
model = LinearRegression()
model.fit(X.reshape(-1, 1), y.reshape(-1, 1))

(w, b) = model.coef_[0][0], model.intercept_[0]
print(w, b)

3.007522942065274 4.245681353449078


In [13]:
# Su dung gradient descent cho ham mat mat L(w) = 1 / 2N * l2_norm(y - X.T * w) ^ 2, grad_w of L(w) is 1 / N * X * (X.T * w - y)

one = np.ones((X.shape[0], 1))
Xbar = np.concatenate((one, X.reshape(-1, 1)), axis=1)

def grad2(w):
    N = Xbar.shape[0]
    return 1 / N * Xbar.T.dot(Xbar.dot(w) - y)

def cost2(w):
    N = Xbar.shape[0]
    return .5 / N * np.linalg.norm(y - Xbar.dot(w)) ** 2

def myGD(w_init, theta):
    w = [w_init]
    for it in range(100):
        w_new = w[-1] - theta * grad2(w[-1])
        if np.linalg.norm(grad2(w_new)) / len(w_new) < 1e-3:
            break
        w.append(w_new)
    return (w, it)

w_init = [[2], [1]]
w1, it1 = myGD(w_init, 1)
print(w1[-1], it1)

[[4.2599633 ]
 [2.98063668]] 46


In [23]:
# Gradient descent with momentum for f(x) = x ^ 2 + 10sin(x)
def GD_momentum(theta_init, eta, gamma):
    #Suppose we want to store history of theta
    theta = [theta_init]
    v_old = np.zeros_like(theta_init)
    for it in range(100):
        v_new = gamma * v_old + eta * grad2(theta[-1])
        theta_new = theta[-1] - v_new
        if np.linalg.norm(grad2(theta_new)) / np.array(theta_init).size < 1e-3:
            break
        v_old = v_new
        theta.append(theta_new)
    return (theta, it)

w, it = GD_momentum([[2], [1]], 1, .9)
print(w[-1], it)

[[4.230418  ]
 [2.99448792]] 97


In [30]:
# Gradient descent with momentum using NAG
def GD_NAG(theta_init, eta, gamma):
    theta = [theta_init]
    v = [np.zeros_like(theta_init)]
    for it in range(100):
        v_new = gamma * v[-1] + eta * grad2(theta[-1] - gamma * v[-1])
        theta_new = theta[-1] - v_new
        if np.linalg.norm(grad2(theta_new)) / np.array(theta_init).size < 1e-3:
            break
        theta.append(theta_new)
        v.append(v_new)
    return (theta, it)

w, it = GD_NAG([[2], [1]], .5, .9)
print(w[-1], it)

[[4.22143242]
 [3.05317148]] 26


In [37]:
#SGD
def grad_SGD(xi, yi, w):
    xi = xi.reshape(1, -1)
    return xi.T.dot(xi.dot(w) - yi)

def SGD(theta_init, eta):
    it = 0
    theta = [theta_init]
    while it < 100:
        id = np.random.choice(X.shape[0], X.shape[0], replace=False)
        kt = 0
        for k in id:
            it += 1
            theta_new = theta[-1] - eta * grad_SGD(Xbar[k], y[k], theta[-1])
            if np.linalg.norm(grad_SGD(Xbar[k], y[k], theta[-1])) / np.array(theta_new).size < 1e-3:
                kt = 1
                break
            theta.append(theta_new)
        if kt == 1:
            break
    return (theta, it)

w, it = SGD([[2], [1]], .5)
print(w[-1], it)


[[4.26316537]
 [3.02729627]] 220


In [14]:
t = [1, 2, 3]
print(np.zeros_like(t))

[0 0 0]
